Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,13 @@ black==22.3.0
bleach==6.1.0
boolean.py==4.0
certifi==2024.7.4
cffi==1.15.0
cffi==2.0.0
chardet==4.0.0
charset-normalizer==2.0.12
click==8.1.2
coreapi==2.3.3
coreschema==0.0.4
cryptography==44.0.1
cryptography==46.0.5
crispy-bootstrap4==2024.1
cwe2==3.0.0
dateparser==1.3.0
Expand Down Expand Up @@ -124,3 +124,5 @@ wcwidth==0.2.5
websocket-client==0.59.0
yarl==1.7.2
zipp==3.19.1
PyGithub==2.6.1
python-gitlab~=7.1.0
2 changes: 2 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,8 @@ install_requires =

# networking
GitPython>=3.1.17
PyGithub>=2.6.1
python-gitlab>=7.1.0
requests>=2.25.1
fetchcode>=0.6.0

Expand Down
30 changes: 30 additions & 0 deletions vulnerabilities/importers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
from vulnerabilities.pipelines.v2_importers import apache_tomcat_importer as apache_tomcat_v2
from vulnerabilities.pipelines.v2_importers import archlinux_importer as archlinux_importer_v2
from vulnerabilities.pipelines.v2_importers import collect_fix_commits as collect_fix_commits_v2
from vulnerabilities.pipelines.v2_importers import collect_issue_pr as collect_issue_pr_v2
from vulnerabilities.pipelines.v2_importers import curl_importer as curl_importer_v2
from vulnerabilities.pipelines.v2_importers import debian_importer as debian_importer_v2
from vulnerabilities.pipelines.v2_importers import (
Expand Down Expand Up @@ -191,5 +192,34 @@
collect_fix_commits_v2.CollectGitFixCommitsPipeline,
collect_fix_commits_v2.CollectJenkinsFixCommitsPipeline,
collect_fix_commits_v2.CollectGitlabFixCommitsPipeline,
collect_issue_pr_v2.CollectBusyBoxPRSIssuesPipeline,
collect_issue_pr_v2.CollectNginxPRSIssuesPipeline,
collect_issue_pr_v2.CollectApacheTomcatPRSIssuesPipeline,
collect_issue_pr_v2.CollectMongodbPRSIssuesPipeline,
collect_issue_pr_v2.CollectRedisPRSIssuesPipeline,
collect_issue_pr_v2.CollectPhpPRSIssuesPipeline,
collect_issue_pr_v2.CollectPythonCpythonPRSIssuesPipeline,
collect_issue_pr_v2.CollectRubyPRSIssuesPipeline,
collect_issue_pr_v2.CollectGoPRSIssuesPipeline,
collect_issue_pr_v2.CollectNodeJsPRSIssuesPipeline,
collect_issue_pr_v2.CollectRustPRSIssuesPipeline,
collect_issue_pr_v2.CollectOpenjdkPRSIssuesPipeline,
collect_issue_pr_v2.CollectSwiftPRSIssuesPipeline,
collect_issue_pr_v2.CollectDjangoPRSIssuesPipeline,
collect_issue_pr_v2.CollectLaravelPRSIssuesPipeline,
collect_issue_pr_v2.CollectSpringFrameworkPRSIssuesPipeline,
collect_issue_pr_v2.CollectRailsPRSIssuesPipeline,
collect_issue_pr_v2.CollectReactPRSIssuesPipeline,
collect_issue_pr_v2.CollectAngularPRSIssuesPipeline,
collect_issue_pr_v2.CollectDockerMobyPRSIssuesPipeline,
collect_issue_pr_v2.CollectKubernetesPRSIssuesPipeline,
collect_issue_pr_v2.CollectContainerdPRSIssuesPipeline,
collect_issue_pr_v2.CollectAnsiblePRSIssuesPipeline,
collect_issue_pr_v2.CollectTerraformPRSIssuesPipeline,
collect_issue_pr_v2.CollectTcpdumpPRSIssuesPipeline,
collect_issue_pr_v2.CollectJenkinsPRSIssuesPipeline,
collect_issue_pr_v2.CollectGitlabPRSIssuesPipeline,
collect_issue_pr_v2.CollectWiresharkPRSIssuesPipeline,
collect_issue_pr_v2.CollectQemuPRSIssuesPipeline,
]
)
108 changes: 108 additions & 0 deletions vulnerabilities/pipelines/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,25 +8,36 @@
#

import logging
import re
import traceback
from abc import abstractmethod
from collections import defaultdict
from datetime import datetime
from datetime import timezone
from timeit import default_timer as timer
from traceback import format_exc as traceback_format_exc
from typing import Iterable
from typing import List
from urllib.parse import urlparse

import gitlab
from aboutcode.pipeline import LoopProgress
from aboutcode.pipeline import PipelineDefinition
from aboutcode.pipeline import humanize_time
from github import Github
from gitlab import GitlabAuthenticationError
from gitlab import GitlabSearchError

from vulnerabilities.importer import AdvisoryData
from vulnerabilities.importer import AdvisoryDataV2
from vulnerabilities.importer import ReferenceV2
from vulnerabilities.improver import MAX_CONFIDENCE
from vulnerabilities.models import Advisory
from vulnerabilities.models import PipelineRun
from vulnerabilities.pipes.advisory import import_advisory
from vulnerabilities.pipes.advisory import insert_advisory
from vulnerabilities.pipes.advisory import insert_advisory_v2
from vulnerablecode.settings import env

module_logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -334,3 +345,100 @@ def collect_and_store_advisories(self):
continue

self.log(f"Successfully collected {collected_advisory_count:,d} advisories")


class VCSCollector(VulnerableCodeBaseImporterPipelineV2):
"""
Pipeline to collect GitHub/GitLab issues and PRs related to vulnerabilities.
"""

vcs_url: str
CVE_PATTERN = re.compile(r"(CVE-\d{4}-\d+)", re.IGNORECASE)
SUPPORTED_IDENTIFIERS = ["CVE-"]

collected_items: dict = {}

def advisories_count(self) -> int:
return 0

@classmethod
def steps(cls):
return (
cls.configure_target,
cls.fetch_entries,
cls.collect_items,
cls.collect_and_store_advisories,
)

def configure_target(self):
parsed_url = urlparse(self.repo_url)
parts = parsed_url.path.strip("/").split("/")
if len(parts) < 2:
raise ValueError(f"Invalid URL: {self.repo_url}")

self.repo_name = f"{parts[0]}/{parts[1]}"

@abstractmethod
def fetch_entries(self):
raise NotImplementedError

@abstractmethod
def collect_items(self):
raise NotImplementedError

def collect_advisories(self):
"""
Generate AdvisoryData objects for each vulnerability ID grouped with its related GitHub/Gitlab issues and PRs.
"""
self.log("Generating AdvisoryData objects from GitHub/Gitlab issues and PRs.")
for vuln_id, refs in self.collected_items.items():
references = [ReferenceV2(reference_type=ref_id, url=url) for ref_id, url in refs]
yield AdvisoryDataV2(
advisory_id=vuln_id,
aliases=[],
references=references,
url=self.repo_url,
)


class GitHubCollector(VCSCollector):
def fetch_entries(self):
"""Fetch GitHub Data Entries"""
github_token = env.str("GITHUB_TOKEN")
g = Github(login_or_token=github_token)
base_query = f"repo:{self.repo_name} ({' OR '.join(self.SUPPORTED_IDENTIFIERS)})"
self.issues = g.search_issues(f"{base_query} is:issue")
self.prs = g.search_issues(f"{base_query} is:pr")

def collect_items(self):
self.collected_items = defaultdict(list)

for i_type, items in [("Issue", self.issues), ("PR", self.prs)]:
for item in items:
matches = self.CVE_PATTERN.findall(item.title + " " + (item.body or ""))
for match in matches:
cve_id = match.upper()
self.collected_items[cve_id].append(("Issue", item.html_url))


class GitLabCollector(VCSCollector):
def fetch_entries(self):
"""Fetch GitLab Data Entries"""
gitlab_token = env.str("GITLAB_TOKEN")
gl = gitlab.Gitlab("https://gitlab.com/", private_token=gitlab_token)
project = gl.projects.get(self.repo_name)
base_query = " ".join(self.SUPPORTED_IDENTIFIERS)
self.issues = project.search(scope="issues", search=base_query)
self.prs = project.search(scope="merge_requests", search=base_query)

def collect_items(self):
self.collected_items = defaultdict(list)
for i_type, items in [("Issue", self.issues), ("PR", self.prs)]:
for item in items:
title = item.get("title") or ""
description = item.get("description") or ""
matches = self.CVE_PATTERN.findall(title + " " + description)
for match in matches:
cve_id = match.upper()
url = item.get("web_url")
self.collected_items[cve_id].append((i_type, url))
156 changes: 156 additions & 0 deletions vulnerabilities/pipelines/v2_importers/collect_issue_pr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
#
# Copyright (c) nexB Inc. and others. All rights reserved.
# VulnerableCode is a trademark of nexB Inc.
# SPDX-License-Identifier: Apache-2.0
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
# See https://github.com/aboutcode-org/vulnerablecode for support or download.
# See https://aboutcode.org for more information about nexB OSS projects.
#

from vulnerabilities.pipelines import GitHubCollector
from vulnerabilities.pipelines import GitLabCollector


class CollectBusyBoxPRSIssuesPipeline(GitHubCollector):
pipeline_id = "collect-busybox-prs-issues"
repo_url = "https://github.com/mirror/busybox"


class CollectNginxPRSIssuesPipeline(GitHubCollector):
pipeline_id = "collect-nginx-prs-issues"
repo_url = "https://github.com/nginx/nginx"


class CollectApacheTomcatPRSIssuesPipeline(GitHubCollector):
pipeline_id = "collect-apache-tomcat-prs-issues"
repo_url = "https://github.com/apache/tomcat"


class CollectMongodbPRSIssuesPipeline(GitHubCollector):
pipeline_id = "collect-mongodb-prs-issues"
repo_url = "https://github.com/mongodb/mongo"


class CollectRedisPRSIssuesPipeline(GitHubCollector):
pipeline_id = "collect-redis-prs-issues"
repo_url = "https://github.com/redis/redis"


class CollectPhpPRSIssuesPipeline(GitHubCollector):
pipeline_id = "collect-php-prs-issues"
repo_url = "https://github.com/php/php-src"


class CollectPythonCpythonPRSIssuesPipeline(GitHubCollector):
pipeline_id = "collect-python-cpython-prs-issues"
repo_url = "https://github.com/python/cpython"


class CollectRubyPRSIssuesPipeline(GitHubCollector):
pipeline_id = "collect-ruby-prs-issues"
repo_url = "https://github.com/ruby/ruby"


class CollectGoPRSIssuesPipeline(GitHubCollector):
pipeline_id = "collect-go-prs-issues"
repo_url = "https://github.com/golang/go"


class CollectNodeJsPRSIssuesPipeline(GitHubCollector):
pipeline_id = "collect-node-js-prs-issues"
repo_url = "https://github.com/nodejs/node"


class CollectRustPRSIssuesPipeline(GitHubCollector):
pipeline_id = "collect-rust-prs-issues"
repo_url = "https://github.com/rust-lang/rust"


class CollectOpenjdkPRSIssuesPipeline(GitHubCollector):
pipeline_id = "collect-openjdk-prs-issues"
repo_url = "https://github.com/openjdk/jdk"


class CollectSwiftPRSIssuesPipeline(GitHubCollector):
pipeline_id = "collect-swift-prs-issues"
repo_url = "https://github.com/swiftlang/swift"


class CollectDjangoPRSIssuesPipeline(GitHubCollector):
pipeline_id = "collect-django-prs-issues"
repo_url = "https://github.com/django/django"


class CollectRailsPRSIssuesPipeline(GitHubCollector):
pipeline_id = "collect-rails-prs-issues"
repo_url = "https://github.com/rails/rails"


class CollectLaravelPRSIssuesPipeline(GitHubCollector):
pipeline_id = "collect-laravel-prs-issues"
repo_url = "https://github.com/laravel/framework"


class CollectSpringFrameworkPRSIssuesPipeline(GitHubCollector):
pipeline_id = "collect-spring-framework-prs-issues"
repo_url = "https://github.com/spring-projects/spring-framework"


class CollectReactPRSIssuesPipeline(GitHubCollector):
pipeline_id = "collect-react-prs-issues"
repo_url = "https://github.com/facebook/react"


class CollectAngularPRSIssuesPipeline(GitHubCollector):
pipeline_id = "collect-angular-prs-issues"
repo_url = "https://github.com/angular/angular"


class CollectDockerMobyPRSIssuesPipeline(GitHubCollector):
pipeline_id = "collect-docker-moby-prs-issues"
repo_url = "https://github.com/moby/moby"


class CollectKubernetesPRSIssuesPipeline(GitHubCollector):
pipeline_id = "collect-kubernetes-prs-issues"
repo_url = "https://github.com/kubernetes/kubernetes"


class CollectContainerdPRSIssuesPipeline(GitHubCollector):
pipeline_id = "collect-containerd-prs-issues"
repo_url = "https://github.com/containerd/containerd"


class CollectAnsiblePRSIssuesPipeline(GitHubCollector):
pipeline_id = "collect-ansible-prs-issues"
repo_url = "https://github.com/ansible/ansible"


class CollectTerraformPRSIssuesPipeline(GitHubCollector):
pipeline_id = "collect-terraform-prs-issues"
repo_url = "https://github.com/hashicorp/terraform"


class CollectTcpdumpPRSIssuesPipeline(GitHubCollector):
pipeline_id = "collect-tcpdump-prs-issues"
repo_url = "https://github.com/the-tcpdump-group/tcpdump"


class CollectJenkinsPRSIssuesPipeline(GitHubCollector):
pipeline_id = "collect-jenkins_prs-issues"
repo_url = "https://github.com/jenkinsci/jenkins"


class CollectGitlabPRSIssuesPipeline(GitLabCollector):
pipeline_id = "collect-gitlab-prs-issues"
repo_url = "https://gitlab.com/gitlab-org/gitlab-foss"


class CollectWiresharkPRSIssuesPipeline(GitLabCollector):
pipeline_id = "collect-wireshark-prs-issues"
repo_url = "https://gitlab.com/wireshark/wireshark"


class CollectQemuPRSIssuesPipeline(GitLabCollector):
pipeline_id = "collect-qemu-prs-issues"
repo_url = "https://gitlab.com/qemu-project/qemu"
Loading
Loading