Skip to content

Commit ad11073

Browse files
committed
Add migration for missing metadata artifacts
1 parent e134c20 commit ad11073

File tree

1 file changed

+190
-0
lines changed

1 file changed

+190
-0
lines changed
Lines changed: 190 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,190 @@
1+
# Generated manually on 2025-12-15 14:00 for creating missing metadata artifacts
2+
3+
from django.db import migrations
4+
5+
BATCH_SIZE = 1000
6+
7+
8+
def pulp_hashlib_new(name, *args, **kwargs):
9+
"""
10+
Copied and updated (to comply with migrations) from pulpcore.
11+
"""
12+
import hashlib as the_real_hashlib
13+
from django.conf import settings
14+
15+
if name not in settings.ALLOWED_CONTENT_CHECKSUMS:
16+
return None
17+
18+
return the_real_hashlib.new(name, *args, **kwargs)
19+
20+
21+
def init_and_validate(file, artifact_model, expected_digests):
22+
"""
23+
Copied and updated (to comply with migrations) from pulpcore.
24+
"""
25+
from django.conf import settings
26+
27+
digest_fields = []
28+
for alg in ("sha512", "sha384", "sha256", "sha224", "sha1", "md5"):
29+
if alg in settings.ALLOWED_CONTENT_CHECKSUMS:
30+
digest_fields.append(alg)
31+
32+
if isinstance(file, str):
33+
with open(file, "rb") as f:
34+
hashers = {
35+
n: hasher for n in digest_fields if (hasher := pulp_hashlib_new(n)) is not None
36+
}
37+
if not hashers:
38+
return None
39+
40+
size = 0
41+
while True:
42+
chunk = f.read(1048576) # 1 megabyte
43+
if not chunk:
44+
break
45+
for algorithm in hashers.values():
46+
algorithm.update(chunk)
47+
size = size + len(chunk)
48+
else:
49+
size = file.size
50+
hashers = file.hashers
51+
52+
for algorithm, expected_digest in expected_digests.items():
53+
if algorithm not in hashers:
54+
return None
55+
actual_digest = hashers[algorithm].hexdigest()
56+
if expected_digest != actual_digest:
57+
return None
58+
59+
attributes = {"size": size, "file": file}
60+
for algorithm in digest_fields:
61+
attributes[algorithm] = hashers[algorithm].hexdigest()
62+
63+
return artifact_model(**attributes)
64+
65+
66+
def extract_wheel_metadata(filename):
67+
"""
68+
Extract the metadata file content from a wheel file.
69+
Returns the raw metadata content as bytes or None if metadata cannot be extracted.
70+
"""
71+
import zipfile
72+
73+
try:
74+
with zipfile.ZipFile(filename, "r") as f:
75+
for file_path in f.namelist():
76+
if file_path.endswith(".dist-info/METADATA"):
77+
return f.read(file_path)
78+
except (zipfile.BadZipFile, KeyError, OSError):
79+
pass
80+
return None
81+
82+
83+
def artifact_to_metadata_artifact(filename, artifact, md_digests, tmp_dir, artifact_model):
84+
"""
85+
Creates artifact for metadata from the provided wheel artifact.
86+
"""
87+
import shutil
88+
import tempfile
89+
90+
with tempfile.NamedTemporaryFile("wb", dir=tmp_dir, suffix=filename, delete=False) as temp_file:
91+
temp_wheel_path = temp_file.name
92+
artifact.file.seek(0)
93+
shutil.copyfileobj(artifact.file, temp_file)
94+
temp_file.flush()
95+
96+
metadata_content = extract_wheel_metadata(temp_wheel_path)
97+
if not metadata_content:
98+
return None
99+
100+
with tempfile.NamedTemporaryFile(
101+
"wb", dir=tmp_dir, suffix=".metadata", delete=False
102+
) as temp_md:
103+
temp_metadata_path = temp_md.name
104+
temp_md.write(metadata_content)
105+
temp_md.flush()
106+
107+
metadata_artifact = init_and_validate(temp_metadata_path, artifact_model, md_digests)
108+
return metadata_artifact
109+
110+
111+
def create_missing_metadata_artifacts(apps, schema_editor):
112+
"""
113+
Create metadata artifacts for PythonPackageContent instances that have metadata_sha256
114+
but are missing the corresponding metadata artifact.
115+
"""
116+
import tempfile
117+
from django.conf import settings
118+
119+
PythonPackageContent = apps.get_model("python", "PythonPackageContent")
120+
ContentArtifact = apps.get_model("core", "ContentArtifact")
121+
Artifact = apps.get_model("core", "Artifact")
122+
123+
packages = (
124+
PythonPackageContent.objects.filter(
125+
metadata_sha256__isnull=False, filename__endswith=".whl"
126+
)
127+
.exclude(metadata_sha256="")
128+
.prefetch_related("contentartifact_set")
129+
.only("filename", "metadata_sha256")
130+
)
131+
artifact_batch = []
132+
contentartifact_batch = []
133+
134+
with tempfile.TemporaryDirectory(dir=settings.WORKING_DIRECTORY) as temp_dir:
135+
for package in packages:
136+
filename = package.filename
137+
content_artifacts = list(package.contentartifact_set.all())
138+
# ContentArtifact and Artifact for metadata cannot exist yet because
139+
# this migration is released together with the new functionality which creates them
140+
141+
# Get the main artifact for package
142+
main_artifact = None
143+
for ca in content_artifacts:
144+
if ca.relative_path == filename and ca.artifact:
145+
main_artifact = ca.artifact
146+
break
147+
148+
if not main_artifact:
149+
# Main artifact does not exist
150+
continue
151+
152+
metadata_digests = {"sha256": package.metadata_sha256}
153+
metadata_artifact = artifact_to_metadata_artifact(
154+
filename, main_artifact, metadata_digests, temp_dir, Artifact
155+
)
156+
if not metadata_artifact:
157+
# Failed to build metadata artifact
158+
continue
159+
160+
contentartifact = ContentArtifact(
161+
artifact=metadata_artifact,
162+
content=package,
163+
relative_path=f"{filename}.metadata",
164+
)
165+
artifact_batch.append(metadata_artifact)
166+
contentartifact_batch.append(contentartifact)
167+
168+
if len(artifact_batch) == BATCH_SIZE:
169+
Artifact.objects.bulk_create(artifact_batch, batch_size=BATCH_SIZE)
170+
ContentArtifact.objects.bulk_create(contentartifact_batch, batch_size=BATCH_SIZE)
171+
artifact_batch.clear()
172+
contentartifact_batch.clear()
173+
174+
if artifact_batch:
175+
Artifact.objects.bulk_create(artifact_batch, batch_size=BATCH_SIZE)
176+
ContentArtifact.objects.bulk_create(contentartifact_batch, batch_size=BATCH_SIZE)
177+
178+
179+
class Migration(migrations.Migration):
180+
181+
dependencies = [
182+
("python", "0018_packageprovenance"),
183+
]
184+
185+
operations = [
186+
migrations.RunPython(
187+
create_missing_metadata_artifacts,
188+
reverse_code=migrations.RunPython.noop,
189+
),
190+
]

0 commit comments

Comments
 (0)