apple · cluster2600 · Mar 5, 2026 · Mar 6, 2026 · Mar 17, 2026 · Mar 17, 2026
diff --git a/Sources/Containerization/Image/InitImage.swift b/Sources/Containerization/Image/InitImage.swift
@@ -53,10 +53,8 @@ extension InitImage {
             var result = try writer.create(from: rootfs)
             let layerDescriptor = Descriptor(mediaType: ContainerizationOCI.MediaTypes.imageLayerGzip, digest: result.digest.digestString, size: result.size)
 
-            // TODO: compute and fill in the correct diffID for the above layer
-            // We currently put in the sha of the fully compressed layer, this needs to be replaced with
-            // the sha of the uncompressed layer.
-            let rootfsConfig = ContainerizationOCI.Rootfs(type: "layers", diffIDs: [result.digest.digestString])
+            let diffID = try ContentWriter.diffID(of: rootfs)
+            let rootfsConfig = ContainerizationOCI.Rootfs(type: "layers", diffIDs: [diffID.digestString])
             let runtimeConfig = ContainerizationOCI.ImageConfig(labels: labels)
             let imageConfig = ContainerizationOCI.Image(architecture: platform.architecture, os: platform.os, config: runtimeConfig, rootfs: rootfsConfig)
             result = try writer.create(from: imageConfig)

diff --git a/Sources/ContainerizationOCI/Content/ContentWriter.swift b/Sources/ContainerizationOCI/Content/ContentWriter.swift
@@ -14,10 +14,12 @@
 // limitations under the License.
 //===----------------------------------------------------------------------===//
 
+import Compression
 import ContainerizationError
 import Crypto
 import Foundation
 import NIOCore
+import zlib
 
 /// Provides a context to write data into a directory.
 public class ContentWriter {
@@ -126,6 +128,176 @@ public class ContentWriter {
         return (totalSize, digest)
     }
 
+    /// Computes the SHA256 digest of the uncompressed content of a gzip file.
+    ///
+    /// Per the OCI Image Specification, a DiffID is the SHA256 digest of the
+    /// uncompressed layer content. This method streams the compressed file in
+    /// chunks, decompresses through Apple's Compression framework, and feeds
+    /// each decompressed chunk into an incremental SHA256 hasher. Neither the
+    /// full compressed nor the full decompressed data is held in memory.
+    ///
+    /// - Parameter url: The URL of the gzip-compressed file.
+    /// - Returns: The SHA256 digest of the uncompressed content.
+    public static func diffID(of url: URL) throws -> SHA256.Digest {
+        let fileHandle = try FileHandle(forReadingFrom: url)
+        defer { fileHandle.closeFile() }
+
+        // Read just enough to parse the gzip header (initial 512 bytes is plenty).
+        let headerReadSize = 512
+        guard let headerData = Self.readExactly(fileHandle: fileHandle, count: headerReadSize), !headerData.isEmpty else {
+            throw ContainerizationError(.internalError, message: "invalid gzip file")
+        }
+        let headerSize = try Self.gzipHeaderSize(headerData)
+
+        // Read the gzip trailer (last 8 bytes) to validate CRC32 + ISIZE later.
+        // Seek to the end to get the file size, then read the trailer.
+        fileHandle.seekToEndOfFile()
+        let fileSize = fileHandle.offsetInFile
+        guard fileSize >= 8 else {
+            throw ContainerizationError(.internalError, message: "gzip trailer mismatch")
+        }
+        fileHandle.seek(toFileOffset: fileSize - 8)
+        guard let trailerData = Self.readExactly(fileHandle: fileHandle, count: 8),
+              trailerData.count == 8 else {
+            throw ContainerizationError(.internalError, message: "gzip trailer mismatch")
+        }
+        let expectedCRC = UInt32(trailerData[trailerData.startIndex])
+            | (UInt32(trailerData[trailerData.startIndex + 1]) << 8)
+            | (UInt32(trailerData[trailerData.startIndex + 2]) << 16)
+            | (UInt32(trailerData[trailerData.startIndex + 3]) << 24)
+        let expectedSize = UInt32(trailerData[trailerData.startIndex + 4])
+            | (UInt32(trailerData[trailerData.startIndex + 5]) << 8)
+            | (UInt32(trailerData[trailerData.startIndex + 6]) << 16)
+            | (UInt32(trailerData[trailerData.startIndex + 7]) << 24)
+
+        // Seek past the gzip header to the start of the deflate stream.
+        // The deflate data spans from headerSize to fileSize - 8 (the last 8 bytes
+        // are the gzip trailer: CRC32 + ISIZE). We must not feed the trailer to
+        // the decompressor.
+        fileHandle.seek(toFileOffset: UInt64(headerSize))
+        var compressedBytesRemaining = Int(fileSize) - headerSize - 8
+        guard compressedBytesRemaining >= 0 else {
+            throw ContainerizationError(.internalError, message: "invalid gzip file")
+        }
+
+        // Set up the decompression stream.
+        let chunkSize = 65_536
+        let sourceBuffer = UnsafeMutablePointer<UInt8>.allocate(capacity: chunkSize)
+        let destinationBuffer = UnsafeMutablePointer<UInt8>.allocate(capacity: chunkSize)
+        defer {
+            sourceBuffer.deallocate()
+            destinationBuffer.deallocate()
+        }
+
+        let stream = UnsafeMutablePointer<compression_stream>.allocate(capacity: 1)
+        defer { stream.deallocate() }
+
+        var status = compression_stream_init(stream, COMPRESSION_STREAM_DECODE, COMPRESSION_ZLIB)
+        guard status != COMPRESSION_STATUS_ERROR else {
+            throw ContainerizationError(.internalError, message: "gzip decompression failed")
+        }
+        defer { compression_stream_destroy(stream) }
+
+        // Start with an empty source; we fill it from the file below.
+        stream.pointee.src_ptr = UnsafePointer(sourceBuffer)
+        stream.pointee.src_size = 0
+        stream.pointee.dst_ptr = destinationBuffer
+        stream.pointee.dst_size = chunkSize
+
+        var hasher = SHA256()
+        var runningCRC: uLong = crc32(0, nil, 0)
+        var totalDecompressedSize: UInt64 = 0
+        var inputExhausted = false
+
+        while status != COMPRESSION_STATUS_END {
+            // Refill the source buffer when it is exhausted and more data is available.
+            if stream.pointee.src_size == 0 && !inputExhausted {
+                let toRead = min(chunkSize, compressedBytesRemaining)
+                if toRead > 0, let chunk = fileHandle.readData(ofLength: toRead) as Data?, !chunk.isEmpty {
+                    compressedBytesRemaining -= chunk.count
+                    chunk.copyBytes(to: sourceBuffer, count: chunk.count)
+                    stream.pointee.src_ptr = UnsafePointer(sourceBuffer)
+                    stream.pointee.src_size = chunk.count
+                } else {
+                    inputExhausted = true
+                }
+            }
+
+            stream.pointee.dst_ptr = destinationBuffer
+            stream.pointee.dst_size = chunkSize
+
+            let flags: Int32 = inputExhausted ? Int32(COMPRESSION_STREAM_FINALIZE.rawValue) : 0
+            status = compression_stream_process(stream, flags)
+
+            switch status {
+            case COMPRESSION_STATUS_OK, COMPRESSION_STATUS_END:
+                let produced = chunkSize - stream.pointee.dst_size
+                if produced > 0 {
+                    let buf = UnsafeBufferPointer(start: destinationBuffer, count: produced)
+                    hasher.update(bufferPointer: UnsafeRawBufferPointer(buf))
+                    runningCRC = crc32(runningCRC, destinationBuffer, uInt(produced))
+                    totalDecompressedSize += UInt64(produced)
+                }
+
+            default:
+                throw ContainerizationError(.internalError, message: "gzip decompression failed")
+            }
+        }
+
+        // Validate the gzip trailer.
+        let actualCRC = UInt32(truncatingIfNeeded: runningCRC)
+        let actualSize = UInt32(truncatingIfNeeded: totalDecompressedSize)
+
+        guard expectedCRC == actualCRC, expectedSize == actualSize else {
+            throw ContainerizationError(.internalError, message: "gzip trailer mismatch")
+        }
+
+        return hasher.finalize()
+    }
+
+    /// Reads exactly `count` bytes from a FileHandle, returning nil on failure.
+    private static func readExactly(fileHandle: FileHandle, count: Int) -> Data? {
+        let data = fileHandle.readData(ofLength: count)
+        return data.isEmpty ? nil : data
+    }
+
+    /// Parses the gzip header to determine where the raw deflate stream begins.
+    private static func gzipHeaderSize(_ data: Data) throws -> Int {
+        guard data.count >= 10,
+              data[data.startIndex] == 0x1f,
+              data[data.startIndex + 1] == 0x8b,
+              data[data.startIndex + 2] == 0x08  // CM must be 8 (deflate) per RFC 1952
+        else {
+            throw ContainerizationError(.internalError, message: "invalid gzip file")
+        }
+
+        let start = data.startIndex
+        let flags = data[start + 3]
+        var offset = 10
+
+        // FEXTRA
+        if flags & 0x04 != 0 {
+            guard data.count >= offset + 2 else { throw ContainerizationError(.internalError, message: "invalid gzip file") }
+            let extraLen = Int(data[start + offset]) | (Int(data[start + offset + 1]) << 8)
+            offset += 2 + extraLen
+        }
+        // FNAME
+        if flags & 0x08 != 0 {
+            while offset < data.count && data[start + offset] != 0 { offset += 1 }
+            offset += 1
+        }
+        // FCOMMENT
+        if flags & 0x10 != 0 {
+            while offset < data.count && data[start + offset] != 0 { offset += 1 }
+            offset += 1
+        }
+        // FHCRC
+        if flags & 0x02 != 0 { offset += 2 }
+
+        guard offset < data.count else { throw ContainerizationError(.internalError, message: "invalid gzip file") }
+        return offset
+    }
+
     /// Encodes the passed in type as a JSON blob and writes it to the base path.
     /// - Parameters:
     ///   - content: The type to convert to JSON.
@@ -135,3 +307,4 @@ public class ContentWriter {
         return try self.write(data)
     }
 }
+
diff --git a/Tests/ContainerizationOCITests/DiffIDTests.swift b/Tests/ContainerizationOCITests/DiffIDTests.swift
@@ -0,0 +1,160 @@
+//===----------------------------------------------------------------------===//
+// Copyright © 2025-2026 Apple Inc. and the Containerization project authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//   https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//===----------------------------------------------------------------------===//
+
+import ContainerizationError
+import Crypto
+import Foundation
+import Testing
+
+@testable import ContainerizationOCI
+
+struct DiffIDTests {
+    /// Helper to create a gzip-compressed temporary file from raw data.
+    private func createGzipFile(content: Data) throws -> URL {
+        let tempDir = FileManager.default.temporaryDirectory
+        let rawFile = tempDir.appendingPathComponent(UUID().uuidString)
+        let gzFile = tempDir.appendingPathComponent(UUID().uuidString + ".gz")
+        try content.write(to: rawFile)
+        defer { try? FileManager.default.removeItem(at: rawFile) }
+
+        let process = Process()
+        process.executableURL = URL(fileURLWithPath: "/usr/bin/gzip")
+        process.arguments = ["-k", "-f", rawFile.path]
+        try process.run()
+        process.waitUntilExit()
+
+        let gzPath = URL(fileURLWithPath: rawFile.path + ".gz")
+        if FileManager.default.fileExists(atPath: gzPath.path) {
+            try FileManager.default.moveItem(at: gzPath, to: gzFile)
+        }
+        return gzFile
+    }
+
+    @Test func diffIDMatchesUncompressedSHA256() throws {
+        let content = Data("hello, oci layer content for diffid test".utf8)
+        let gzFile = try createGzipFile(content: content)
+        defer { try? FileManager.default.removeItem(at: gzFile) }
+
+        let diffID = try ContentWriter.diffID(of: gzFile)
+        let expected = SHA256.hash(data: content)
+
+        #expect(diffID.digestString == expected.digestString)
+    }
+
+    @Test func diffIDIsDeterministic() throws {
+        let content = Data("deterministic diffid check".utf8)
+        let gzFile = try createGzipFile(content: content)
+        defer { try? FileManager.default.removeItem(at: gzFile) }
+
+        let first = try ContentWriter.diffID(of: gzFile)
+        let second = try ContentWriter.diffID(of: gzFile)
+
+        #expect(first.digestString == second.digestString)
+    }
+
+    @Test func diffIDRejectsNonGzipData() throws {
+        let tempFile = FileManager.default.temporaryDirectory.appendingPathComponent(UUID().uuidString)
+        try Data("this is not gzip".utf8).write(to: tempFile)
+        defer { try? FileManager.default.removeItem(at: tempFile) }
+
+        #expect(throws: ContainerizationError.self) {
+            try ContentWriter.diffID(of: tempFile)
+        }
+    }
+
+    @Test func diffIDRejectsEmptyFile() throws {
+        let tempFile = FileManager.default.temporaryDirectory.appendingPathComponent(UUID().uuidString)
+        try Data().write(to: tempFile)
+        defer { try? FileManager.default.removeItem(at: tempFile) }
+
+        #expect(throws: ContainerizationError.self) {
+            try ContentWriter.diffID(of: tempFile)
+        }
+    }
+
+    @Test func diffIDHandlesLargeContent() throws {
+        // 1MB of repeating data
+        let pattern = Data("ABCDEFGHIJKLMNOPQRSTUVWXYZ012345".utf8)
+        var large = Data()
+        for _ in 0..<(1_048_576 / pattern.count) {
+            large.append(pattern)
+        }
+        let gzFile = try createGzipFile(content: large)
+        defer { try? FileManager.default.removeItem(at: gzFile) }
+
+        let diffID = try ContentWriter.diffID(of: gzFile)
+        let expected = SHA256.hash(data: large)
+
+        #expect(diffID.digestString == expected.digestString)
+    }
+
+    @Test func diffIDRejectsTruncatedGzip() throws {
+        // Build a valid gzip file, then chop off the 8-byte trailer (CRC32 + ISIZE)
+        // to produce a structurally malformed archive.
+        let content = Data("truncated gzip trailer test".utf8)
+        let gzFile = try createGzipFile(content: content)
+        defer { try? FileManager.default.removeItem(at: gzFile) }
+
+        var gzData = try Data(contentsOf: gzFile)
+        guard gzData.count > 8 else {
+            Issue.record("Compressed file too small to truncate")
+            return
+        }
+        gzData.removeLast(8)
+
+        let truncatedFile = FileManager.default.temporaryDirectory
+            .appendingPathComponent(UUID().uuidString + ".gz")
+        try gzData.write(to: truncatedFile)
+        defer { try? FileManager.default.removeItem(at: truncatedFile) }
+
+        #expect(throws: ContainerizationError.self) {
+            try ContentWriter.diffID(of: truncatedFile)
+        }
+    }
+
+    @Test func diffIDRejectsCorruptedCRC() throws {
+        // Flip a byte in the CRC32 field of an otherwise valid gzip file.
+        let content = Data("corrupted crc test".utf8)
+        let gzFile = try createGzipFile(content: content)
+        defer { try? FileManager.default.removeItem(at: gzFile) }
+
+        var gzData = try Data(contentsOf: gzFile)
+        let crcOffset = gzData.count - 8
+        gzData[crcOffset] ^= 0xFF
+
+        let corruptedFile = FileManager.default.temporaryDirectory
+            .appendingPathComponent(UUID().uuidString + ".gz")
+        try gzData.write(to: corruptedFile)
+        defer { try? FileManager.default.removeItem(at: corruptedFile) }
+
+        #expect(throws: ContainerizationError.self) {
+            try ContentWriter.diffID(of: corruptedFile)
+        }
+    }
+
+    @Test func diffIDDigestStringFormat() throws {
+        let content = Data("format test".utf8)
+        let gzFile = try createGzipFile(content: content)
+        defer { try? FileManager.default.removeItem(at: gzFile) }
+
+        let diffID = try ContentWriter.diffID(of: gzFile)
+        let digestString = diffID.digestString
+
+        #expect(digestString.hasPrefix("sha256:"))
+        // sha256: prefix + 64 hex chars
+        #expect(digestString.count == 7 + 64)
+    }
+}