Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 2 additions & 4 deletions Sources/Containerization/Image/InitImage.swift
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,8 @@ extension InitImage {
var result = try writer.create(from: rootfs)
let layerDescriptor = Descriptor(mediaType: ContainerizationOCI.MediaTypes.imageLayerGzip, digest: result.digest.digestString, size: result.size)

// TODO: compute and fill in the correct diffID for the above layer
// We currently put in the sha of the fully compressed layer, this needs to be replaced with
// the sha of the uncompressed layer.
let rootfsConfig = ContainerizationOCI.Rootfs(type: "layers", diffIDs: [result.digest.digestString])
let diffID = try ContentWriter.diffID(of: rootfs)
let rootfsConfig = ContainerizationOCI.Rootfs(type: "layers", diffIDs: [diffID.digestString])
let runtimeConfig = ContainerizationOCI.ImageConfig(labels: labels)
let imageConfig = ContainerizationOCI.Image(architecture: platform.architecture, os: platform.os, config: runtimeConfig, rootfs: rootfsConfig)
result = try writer.create(from: imageConfig)
Expand Down
173 changes: 173 additions & 0 deletions Sources/ContainerizationOCI/Content/ContentWriter.swift
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,12 @@
// limitations under the License.
//===----------------------------------------------------------------------===//

import Compression
import ContainerizationError
import Crypto
import Foundation
import NIOCore
import zlib

/// Provides a context to write data into a directory.
public class ContentWriter {
Expand Down Expand Up @@ -126,6 +128,176 @@ public class ContentWriter {
return (totalSize, digest)
}

/// Computes the SHA256 digest of the uncompressed content of a gzip file.
///
/// Per the OCI Image Specification, a DiffID is the SHA256 digest of the
/// uncompressed layer content. This method streams the compressed file in
/// chunks, decompresses through Apple's Compression framework, and feeds
/// each decompressed chunk into an incremental SHA256 hasher. Neither the
/// full compressed nor the full decompressed data is held in memory.
///
/// - Parameter url: The URL of the gzip-compressed file.
/// - Returns: The SHA256 digest of the uncompressed content.
public static func diffID(of url: URL) throws -> SHA256.Digest {
let fileHandle = try FileHandle(forReadingFrom: url)
defer { fileHandle.closeFile() }

// Read just enough to parse the gzip header (initial 512 bytes is plenty).
let headerReadSize = 512
guard let headerData = Self.readExactly(fileHandle: fileHandle, count: headerReadSize), !headerData.isEmpty else {
throw ContainerizationError(.internalError, message: "invalid gzip file")
}
let headerSize = try Self.gzipHeaderSize(headerData)

// Read the gzip trailer (last 8 bytes) to validate CRC32 + ISIZE later.
// Seek to the end to get the file size, then read the trailer.
fileHandle.seekToEndOfFile()
let fileSize = fileHandle.offsetInFile
guard fileSize >= 8 else {
throw ContainerizationError(.internalError, message: "gzip trailer mismatch")
}
fileHandle.seek(toFileOffset: fileSize - 8)
guard let trailerData = Self.readExactly(fileHandle: fileHandle, count: 8),
trailerData.count == 8 else {
throw ContainerizationError(.internalError, message: "gzip trailer mismatch")
}
let expectedCRC = UInt32(trailerData[trailerData.startIndex])
| (UInt32(trailerData[trailerData.startIndex + 1]) << 8)
| (UInt32(trailerData[trailerData.startIndex + 2]) << 16)
| (UInt32(trailerData[trailerData.startIndex + 3]) << 24)
let expectedSize = UInt32(trailerData[trailerData.startIndex + 4])
| (UInt32(trailerData[trailerData.startIndex + 5]) << 8)
| (UInt32(trailerData[trailerData.startIndex + 6]) << 16)
| (UInt32(trailerData[trailerData.startIndex + 7]) << 24)

// Seek past the gzip header to the start of the deflate stream.
// The deflate data spans from headerSize to fileSize - 8 (the last 8 bytes
// are the gzip trailer: CRC32 + ISIZE). We must not feed the trailer to
// the decompressor.
fileHandle.seek(toFileOffset: UInt64(headerSize))
var compressedBytesRemaining = Int(fileSize) - headerSize - 8
guard compressedBytesRemaining >= 0 else {
throw ContainerizationError(.internalError, message: "invalid gzip file")
}

// Set up the decompression stream.
let chunkSize = 65_536
let sourceBuffer = UnsafeMutablePointer<UInt8>.allocate(capacity: chunkSize)
let destinationBuffer = UnsafeMutablePointer<UInt8>.allocate(capacity: chunkSize)
defer {
sourceBuffer.deallocate()
destinationBuffer.deallocate()
}

let stream = UnsafeMutablePointer<compression_stream>.allocate(capacity: 1)
defer { stream.deallocate() }

var status = compression_stream_init(stream, COMPRESSION_STREAM_DECODE, COMPRESSION_ZLIB)
guard status != COMPRESSION_STATUS_ERROR else {
throw ContainerizationError(.internalError, message: "gzip decompression failed")
}
defer { compression_stream_destroy(stream) }

// Start with an empty source; we fill it from the file below.
stream.pointee.src_ptr = UnsafePointer(sourceBuffer)
stream.pointee.src_size = 0
stream.pointee.dst_ptr = destinationBuffer
stream.pointee.dst_size = chunkSize

var hasher = SHA256()
var runningCRC: uLong = crc32(0, nil, 0)
var totalDecompressedSize: UInt64 = 0
var inputExhausted = false

while status != COMPRESSION_STATUS_END {
// Refill the source buffer when it is exhausted and more data is available.
if stream.pointee.src_size == 0 && !inputExhausted {
let toRead = min(chunkSize, compressedBytesRemaining)
if toRead > 0, let chunk = fileHandle.readData(ofLength: toRead) as Data?, !chunk.isEmpty {
compressedBytesRemaining -= chunk.count
chunk.copyBytes(to: sourceBuffer, count: chunk.count)
stream.pointee.src_ptr = UnsafePointer(sourceBuffer)
stream.pointee.src_size = chunk.count
} else {
inputExhausted = true
}
}

stream.pointee.dst_ptr = destinationBuffer
stream.pointee.dst_size = chunkSize

let flags: Int32 = inputExhausted ? Int32(COMPRESSION_STREAM_FINALIZE.rawValue) : 0
status = compression_stream_process(stream, flags)

switch status {
case COMPRESSION_STATUS_OK, COMPRESSION_STATUS_END:
let produced = chunkSize - stream.pointee.dst_size
if produced > 0 {
let buf = UnsafeBufferPointer(start: destinationBuffer, count: produced)
hasher.update(bufferPointer: UnsafeRawBufferPointer(buf))
runningCRC = crc32(runningCRC, destinationBuffer, uInt(produced))
totalDecompressedSize += UInt64(produced)
}

default:
throw ContainerizationError(.internalError, message: "gzip decompression failed")
}
}

// Validate the gzip trailer.
let actualCRC = UInt32(truncatingIfNeeded: runningCRC)
let actualSize = UInt32(truncatingIfNeeded: totalDecompressedSize)

guard expectedCRC == actualCRC, expectedSize == actualSize else {
throw ContainerizationError(.internalError, message: "gzip trailer mismatch")
}

return hasher.finalize()
}

/// Reads exactly `count` bytes from a FileHandle, returning nil on failure.
private static func readExactly(fileHandle: FileHandle, count: Int) -> Data? {
let data = fileHandle.readData(ofLength: count)
return data.isEmpty ? nil : data
}

/// Parses the gzip header to determine where the raw deflate stream begins.
private static func gzipHeaderSize(_ data: Data) throws -> Int {
guard data.count >= 10,
data[data.startIndex] == 0x1f,
data[data.startIndex + 1] == 0x8b,
data[data.startIndex + 2] == 0x08 // CM must be 8 (deflate) per RFC 1952
else {
throw ContainerizationError(.internalError, message: "invalid gzip file")
}

let start = data.startIndex
let flags = data[start + 3]
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Question: what's the reason the current changes skipped compression method (CM) (ref https://datatracker.ietf.org/doc/html/rfc1952#page-5) entirely.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch — the header parser was only checking the magic bytes (1f 8b) but not the compression method. I've added a guard for CM == 0x08 (deflate), which is the only method defined by RFC 1952. Anything else will now throw invalidGzip.

var offset = 10

// FEXTRA
if flags & 0x04 != 0 {
guard data.count >= offset + 2 else { throw ContainerizationError(.internalError, message: "invalid gzip file") }
let extraLen = Int(data[start + offset]) | (Int(data[start + offset + 1]) << 8)
offset += 2 + extraLen
}
// FNAME
if flags & 0x08 != 0 {
while offset < data.count && data[start + offset] != 0 { offset += 1 }
offset += 1
}
// FCOMMENT
if flags & 0x10 != 0 {
while offset < data.count && data[start + offset] != 0 { offset += 1 }
offset += 1
}
// FHCRC
if flags & 0x02 != 0 { offset += 2 }

guard offset < data.count else { throw ContainerizationError(.internalError, message: "invalid gzip file") }
return offset
}

/// Encodes the passed in type as a JSON blob and writes it to the base path.
/// - Parameters:
/// - content: The type to convert to JSON.
Expand All @@ -135,3 +307,4 @@ public class ContentWriter {
return try self.write(data)
}
}

160 changes: 160 additions & 0 deletions Tests/ContainerizationOCITests/DiffIDTests.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
//===----------------------------------------------------------------------===//
// Copyright © 2025-2026 Apple Inc. and the Containerization project authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//===----------------------------------------------------------------------===//

import ContainerizationError
import Crypto
import Foundation
import Testing

@testable import ContainerizationOCI
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Question: I think we may need a test to validate gzip trailer to ensure it does not return a digest for malformed data.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good shout — the implementation wasn't validating the gzip trailer at all. I've added CRC32 + ISIZE verification after decompression (throws gzipTrailerMismatch on failure) and two new tests: one for a truncated archive with the trailer chopped off, and one for a corrupted CRC32 field.


struct DiffIDTests {
/// Helper to create a gzip-compressed temporary file from raw data.
private func createGzipFile(content: Data) throws -> URL {
let tempDir = FileManager.default.temporaryDirectory
let rawFile = tempDir.appendingPathComponent(UUID().uuidString)
let gzFile = tempDir.appendingPathComponent(UUID().uuidString + ".gz")
try content.write(to: rawFile)
defer { try? FileManager.default.removeItem(at: rawFile) }

let process = Process()
process.executableURL = URL(fileURLWithPath: "/usr/bin/gzip")
process.arguments = ["-k", "-f", rawFile.path]
try process.run()
process.waitUntilExit()

let gzPath = URL(fileURLWithPath: rawFile.path + ".gz")
if FileManager.default.fileExists(atPath: gzPath.path) {
try FileManager.default.moveItem(at: gzPath, to: gzFile)
}
return gzFile
}

@Test func diffIDMatchesUncompressedSHA256() throws {
let content = Data("hello, oci layer content for diffid test".utf8)
let gzFile = try createGzipFile(content: content)
defer { try? FileManager.default.removeItem(at: gzFile) }

let diffID = try ContentWriter.diffID(of: gzFile)
let expected = SHA256.hash(data: content)

#expect(diffID.digestString == expected.digestString)
}

@Test func diffIDIsDeterministic() throws {
let content = Data("deterministic diffid check".utf8)
let gzFile = try createGzipFile(content: content)
defer { try? FileManager.default.removeItem(at: gzFile) }

let first = try ContentWriter.diffID(of: gzFile)
let second = try ContentWriter.diffID(of: gzFile)

#expect(first.digestString == second.digestString)
}

@Test func diffIDRejectsNonGzipData() throws {
let tempFile = FileManager.default.temporaryDirectory.appendingPathComponent(UUID().uuidString)
try Data("this is not gzip".utf8).write(to: tempFile)
defer { try? FileManager.default.removeItem(at: tempFile) }

#expect(throws: ContainerizationError.self) {
try ContentWriter.diffID(of: tempFile)
}
}

@Test func diffIDRejectsEmptyFile() throws {
let tempFile = FileManager.default.temporaryDirectory.appendingPathComponent(UUID().uuidString)
try Data().write(to: tempFile)
defer { try? FileManager.default.removeItem(at: tempFile) }

#expect(throws: ContainerizationError.self) {
try ContentWriter.diffID(of: tempFile)
}
}

@Test func diffIDHandlesLargeContent() throws {
// 1MB of repeating data
let pattern = Data("ABCDEFGHIJKLMNOPQRSTUVWXYZ012345".utf8)
var large = Data()
for _ in 0..<(1_048_576 / pattern.count) {
large.append(pattern)
}
let gzFile = try createGzipFile(content: large)
defer { try? FileManager.default.removeItem(at: gzFile) }

let diffID = try ContentWriter.diffID(of: gzFile)
let expected = SHA256.hash(data: large)

#expect(diffID.digestString == expected.digestString)
}

@Test func diffIDRejectsTruncatedGzip() throws {
// Build a valid gzip file, then chop off the 8-byte trailer (CRC32 + ISIZE)
// to produce a structurally malformed archive.
let content = Data("truncated gzip trailer test".utf8)
let gzFile = try createGzipFile(content: content)
defer { try? FileManager.default.removeItem(at: gzFile) }

var gzData = try Data(contentsOf: gzFile)
guard gzData.count > 8 else {
Issue.record("Compressed file too small to truncate")
return
}
gzData.removeLast(8)

let truncatedFile = FileManager.default.temporaryDirectory
.appendingPathComponent(UUID().uuidString + ".gz")
try gzData.write(to: truncatedFile)
defer { try? FileManager.default.removeItem(at: truncatedFile) }

#expect(throws: ContainerizationError.self) {
try ContentWriter.diffID(of: truncatedFile)
}
}

@Test func diffIDRejectsCorruptedCRC() throws {
// Flip a byte in the CRC32 field of an otherwise valid gzip file.
let content = Data("corrupted crc test".utf8)
let gzFile = try createGzipFile(content: content)
defer { try? FileManager.default.removeItem(at: gzFile) }

var gzData = try Data(contentsOf: gzFile)
let crcOffset = gzData.count - 8
gzData[crcOffset] ^= 0xFF

let corruptedFile = FileManager.default.temporaryDirectory
.appendingPathComponent(UUID().uuidString + ".gz")
try gzData.write(to: corruptedFile)
defer { try? FileManager.default.removeItem(at: corruptedFile) }

#expect(throws: ContainerizationError.self) {
try ContentWriter.diffID(of: corruptedFile)
}
}

@Test func diffIDDigestStringFormat() throws {
let content = Data("format test".utf8)
let gzFile = try createGzipFile(content: content)
defer { try? FileManager.default.removeItem(at: gzFile) }

let diffID = try ContentWriter.diffID(of: gzFile)
let digestString = diffID.digestString

#expect(digestString.hasPrefix("sha256:"))
// sha256: prefix + 64 hex chars
#expect(digestString.count == 7 + 64)
}
}