Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ build-backend = "hatchling.build"

[project]
name = "socketsecurity"
version = "2.2.51"
version = "2.2.54"
requires-python = ">= 3.10"
license = {"file" = "LICENSE"}
dependencies = [
Expand Down
2 changes: 1 addition & 1 deletion socketsecurity/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
__author__ = 'socket.dev'
__version__ = '2.2.51'
__version__ = '2.2.54'
USER_AGENT = f'SocketPythonCLI/{__version__}'
15 changes: 15 additions & 0 deletions socketsecurity/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ class CliConfig:
reach_additional_params: Optional[List[str]] = None
only_facts_file: bool = False
reach_use_only_pregenerated_sboms: bool = False
max_purl_batch_size: int = 5000

@classmethod
def from_args(cls, args_list: Optional[List[str]] = None) -> 'CliConfig':
Expand Down Expand Up @@ -106,6 +107,7 @@ def from_args(cls, args_list: Optional[List[str]] = None) -> 'CliConfig':
'commit_sha': args.commit_sha,
'generate_license': args.generate_license,
'enable_debug': args.enable_debug,
'enable_diff': args.enable_diff,
'allow_unverified': args.allow_unverified,
'enable_json': args.enable_json,
'enable_sarif': args.enable_sarif,
Expand Down Expand Up @@ -141,6 +143,7 @@ def from_args(cls, args_list: Optional[List[str]] = None) -> 'CliConfig':
'reach_additional_params': args.reach_additional_params,
'only_facts_file': args.only_facts_file,
'reach_use_only_pregenerated_sboms': args.reach_use_only_pregenerated_sboms,
'max_purl_batch_size': args.max_purl_batch_size,
'version': __version__
}
try:
Expand Down Expand Up @@ -187,6 +190,11 @@ def from_args(cls, args_list: Optional[List[str]] = None) -> 'CliConfig':
logging.error("--reach-concurrency must be >= 1")
exit(1)

# Validate max_purl_batch_size is within allowed range
if args.max_purl_batch_size < 1 or args.max_purl_batch_size > 9999:
logging.error("--max-purl-batch-size must be between 1 and 9999")
exit(1)

return cls(**config_args)

def to_dict(self) -> dict:
Expand Down Expand Up @@ -446,6 +454,13 @@ def create_argument_parser() -> argparse.ArgumentParser:
action="store_true",
help="Exclude license details from the diff report (boosts performance for large repos)"
)
output_group.add_argument(
"--max-purl-batch-size",
dest="max_purl_batch_size",
type=int,
default=5000,
help="Maximum batch size for PURL endpoint calls when generating license info (default: 5000, min: 1, max: 9999)"
)

output_group.add_argument(
"--disable-security-issue",
Expand Down
126 changes: 54 additions & 72 deletions socketsecurity/core/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -659,54 +659,6 @@ def create_full_scan_with_report_url(
# Return result in the format expected by the user
return diff

def check_full_scans_status(self, head_full_scan_id: str, new_full_scan_id: str) -> bool:
is_ready = False
current_timeout = self.config.timeout
self.sdk.set_timeout(0.5)
try:
self.sdk.fullscans.stream(self.config.org_slug, head_full_scan_id)
except Exception:
log.debug(f"Queued up full scan for processing ({head_full_scan_id})")

try:
self.sdk.fullscans.stream(self.config.org_slug, new_full_scan_id)
except Exception:
log.debug(f"Queued up full scan for processing ({new_full_scan_id})")
self.sdk.set_timeout(current_timeout)
start_check = time.time()
head_is_ready = False
new_is_ready = False
while not is_ready:
head_full_scan_metadata = self.sdk.fullscans.metadata(self.config.org_slug, head_full_scan_id)
if head_full_scan_metadata:
head_state = head_full_scan_metadata.get("scan_state")
else:
head_state = None
new_full_scan_metadata = self.sdk.fullscans.metadata(self.config.org_slug, new_full_scan_id)
if new_full_scan_metadata:
new_state = new_full_scan_metadata.get("scan_state")
else:
new_state = None
if head_state and head_state == "resolve":
head_is_ready = True
if new_state and new_state == "resolve":
new_is_ready = True
if head_is_ready and new_is_ready:
is_ready = True
current_time = time.time()
if current_time - start_check >= self.config.timeout:
log.debug(
f"Timeout reached while waiting for full scans to be ready "
f"({head_full_scan_id}, {new_full_scan_id})"
)
break
total_time = time.time() - start_check
if is_ready:
log.info(f"Full scans are ready in {total_time:.2f} seconds")
else:
log.warning(f"Full scans are not ready yet ({head_full_scan_id}, {new_full_scan_id})")
return is_ready

def get_full_scan(self, full_scan_id: str) -> FullScan:
"""
Get a FullScan object for an existing full scan including sbom_artifacts and packages.
Expand Down Expand Up @@ -846,28 +798,54 @@ def update_package_values(pkg: Package) -> Package:
pkg.url += f"/{pkg.name}/overview/{pkg.version}"
return pkg

def get_license_text_via_purl(self, packages: dict[str, Package]) -> dict:
components = []
def get_license_text_via_purl(self, packages: dict[str, Package], batch_size: int = 5000) -> dict:
"""Get license attribution and details via PURL endpoint in batches.

Args:
packages: Dictionary of packages to get license info for
batch_size: Maximum number of packages to process per API call (1-9999)

Returns:
Updated packages dictionary with licenseAttrib and licenseDetails populated
"""
# Validate batch size
batch_size = max(1, min(9999, batch_size))

# Build list of all components
all_components = []
for purl in packages:
full_purl = f"pkg:/{purl}"
components.append({"purl": full_purl})
results = self.sdk.purl.post(
license=True,
components=components,
licenseattrib=True,
licensedetails=True
)
purl_packages = []
for result in results:
ecosystem = result["type"]
name = result["name"]
package_version = result["version"]
licenseDetails = result.get("licenseDetails")
licenseAttrib = result.get("licenseAttrib")
purl = f"{ecosystem}/{name}@{package_version}"
if purl not in purl_packages and purl in packages:
packages[purl].licenseAttrib = licenseAttrib
packages[purl].licenseDetails = licenseDetails
all_components.append({"purl": full_purl})

# Process in batches
total_components = len(all_components)
log.debug(f"Processing {total_components} packages in batches of {batch_size}")

for i in range(0, total_components, batch_size):
batch_components = all_components[i:i + batch_size]
batch_num = (i // batch_size) + 1
total_batches = (total_components + batch_size - 1) // batch_size
log.debug(f"Processing batch {batch_num}/{total_batches} ({len(batch_components)} packages)")

results = self.sdk.purl.post(
license=True,
components=batch_components,
licenseattrib=True,
licensedetails=True
)

purl_packages = []
for result in results:
ecosystem = result["type"]
name = result["name"]
package_version = result["version"]
licenseDetails = result.get("licenseDetails")
licenseAttrib = result.get("licenseAttrib")
purl = f"{ecosystem}/{name}@{package_version}"
if purl not in purl_packages and purl in packages:
packages[purl].licenseAttrib = licenseAttrib
packages[purl].licenseDetails = licenseDetails

return packages

def get_added_and_removed_packages(
Expand Down Expand Up @@ -960,7 +938,14 @@ def get_added_and_removed_packages(
log.error(f"Artifact details - name: {artifact.name}, version: {artifact.version}")
log.error("No matching packages found in head_full_scan")

packages = self.get_license_text_via_purl(packages)
# Only fetch license details if generate_license is enabled
if self.cli_config and self.cli_config.generate_license:
log.debug("Fetching license details via PURL endpoint")
batch_size = self.cli_config.max_purl_batch_size if self.cli_config else 5000
packages = self.get_license_text_via_purl(packages, batch_size=batch_size)
else:
log.debug("Skipping PURL endpoint call (--generate-license not set)")

return added_packages, removed_packages, packages

def create_new_diff(
Expand Down Expand Up @@ -1092,9 +1077,6 @@ def create_new_diff(
log.warning(f"Failed to clean up temporary file {temp_file}: {e}")

# Handle diff generation - now we always have both scans
scans_ready = self.check_full_scans_status(head_full_scan_id, new_full_scan.id)
if scans_ready is False:
log.error(f"Full scans did not complete within {self.config.timeout} seconds")
(
added_packages,
removed_packages,
Expand Down
41 changes: 27 additions & 14 deletions socketsecurity/socketcli.py
Original file line number Diff line number Diff line change
Expand Up @@ -356,6 +356,7 @@ def main_code():
# Determine files to check based on the new logic
files_to_check = []
force_api_mode = False
force_diff_mode = False

if files_explicitly_specified:
# Case 2: Files are specified - use them and don't check commit details
Expand All @@ -365,10 +366,21 @@ def main_code():
# Case 1: Files not specified and --ignore-commit-files not set - try to find changed files from commit
files_to_check = git_repo.changed_files
log.debug(f"Using changed files from commit: {files_to_check}")
elif config.ignore_commit_files and is_repo:
# Case 3: Git repo with --ignore-commit-files - force diff mode
files_to_check = []
force_diff_mode = True
log.debug("Git repo with --ignore-commit-files: forcing diff mode")
else:
# ignore_commit_files is set or not a repo - scan everything but force API mode if no supported files
# Case 4: Not a git repo (ignore_commit_files was auto-set to True)
files_to_check = []
log.debug("No files to check from commit (ignore_commit_files=True or not a repo)")
# If --enable-diff is set, force diff mode for non-git repos
log.debug(f"Case 4: Non-git repo - config.enable_diff={config.enable_diff}, type={type(config.enable_diff)}")
if config.enable_diff:
force_diff_mode = True
log.debug("Non-git repo with --enable-diff: forcing diff mode")
else:
log.debug("Non-git repo without --enable-diff: will use full scan mode")

# Check if we have supported manifest files
has_supported_files = files_to_check and core.has_manifest_files(files_to_check)
Expand All @@ -389,22 +401,21 @@ def main_code():
has_supported_files = False

# Case 3: If no supported files or files are empty, force API mode (no PR comments)
if not has_supported_files:
# BUT: Don't force API mode if we're in force_diff_mode
log.debug(f"files_to_check={files_to_check}, has_supported_files={has_supported_files}, force_diff_mode={force_diff_mode}, config.enable_diff={config.enable_diff}")
if not has_supported_files and not force_diff_mode:
force_api_mode = True
log.debug("No supported manifest files found, forcing API mode")
log.debug(f"force_api_mode={force_api_mode}")

# Determine scan behavior
should_skip_scan = False # Always perform scan, but behavior changes based on supported files
if config.ignore_commit_files and not files_explicitly_specified:
# Force full scan when ignoring commit files and no explicit files
should_skip_scan = False
log.debug("Forcing full scan due to ignore_commit_files")
elif not has_supported_files:
# No supported files - still scan but in API mode
if not has_supported_files and not force_diff_mode:
# No supported files and not forcing diff - still scan but in API mode
should_skip_scan = False
log.debug("No supported files but will scan in API mode")
else:
log.debug("Found supported manifest files, proceeding with normal scan")
log.debug("Found supported manifest files or forcing diff mode, proceeding with normal scan")

org_slug = core.config.org_slug
if config.repo_is_public:
Expand Down Expand Up @@ -457,6 +468,7 @@ def main_code():
diff.report_url = ""

# Handle SCM-specific flows
log.debug(f"Flow decision: scm={scm is not None}, force_diff_mode={force_diff_mode}, force_api_mode={force_api_mode}, enable_diff={config.enable_diff}")
if scm is not None and scm.check_event_type() == "comment":
# FIXME: This entire flow should be a separate command called "filter_ignored_alerts_in_comments"
# It's not related to scanning or diff generation - it just:
Expand Down Expand Up @@ -531,14 +543,15 @@ def main_code():

output_handler.handle_output(diff)

elif config.enable_diff and not force_api_mode:
# New logic: --enable-diff forces diff mode even with --integration api (no SCM)
elif (config.enable_diff or force_diff_mode) and not force_api_mode:
# New logic: --enable-diff or force_diff_mode (from --ignore-commit-files in git repos) forces diff mode
log.info("Diff mode enabled without SCM integration")
diff = core.create_new_diff(scan_paths, params, no_change=should_skip_scan, save_files_list_path=config.save_submitted_files_list, save_manifest_tar_path=config.save_manifest_tar, base_paths=base_paths, explicit_files=sbom_files_to_submit)
output_handler.handle_output(diff)

elif config.enable_diff and force_api_mode:
# User requested diff mode but no manifest files were detected
elif (config.enable_diff or force_diff_mode) and force_api_mode:
# User requested diff mode but no manifest files were detected - this should not happen with new logic
# but keeping as a safety net
log.warning("--enable-diff was specified but no supported manifest files were detected in the changed files. Falling back to full scan mode.")
log.info("Creating Socket Report (full scan)")
serializable_params = {
Expand Down
Loading