LadnerLab · jeffreyHoelzel · Mar 11, 2026 · Mar 11, 2026 · Mar 11, 2026
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -0,0 +1,33 @@
+name: Tests
+
+on:
+  pull_request:
+  push:
+    branches: [main]
+  workflow_dispatch:
+
+jobs:
+  test-suite:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+
+      - name: Install deps
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r requirements.txt
+
+      - name: Run unit, integration, and e2e tests
+        run: pytest -m "unit or integration or e2e" --junitxml=pytest-all.xml
+
+      - name: Upload test + coverage artifacts
+        uses: actions/upload-artifact@v4
+        with:
+          name: test-suite-reports
+          path: |
+            pytest-all.xml
+            coverage.xml
diff --git a/.gitignore b/.gitignore
@@ -1,14 +1,35 @@
+# Python cache and local environments
 __pycache__/
-.ipynb_checkpoints
+.ipynb_checkpoints/
 .env
 .envrc
 .venv
 env/
 venv/
 ENV/
+
+# Editor settings
 .vscode/
+
+# Local build and smoke-test output
 smoketest/
 .build_pyz/
+build/
+dist/
+pip-wheel-metadata/
+
+# Test runner and coverage artifacts
+.pytest_cache/
+.pytest_tmp*/
+.coverage
+.coverage.*
+coverage.xml
+htmlcov/
+pytest-*.xml
+
+# Project-generated local data/artifacts
+*.egg-info/
+.eggs/
 *.fasta
 *.csv
 *.xlsx
@@ -18,4 +39,5 @@ smoketest/
 *.txt
 *.pyz
 *.png
-*.metadata
+*.metadata
+*.json
diff --git a/pyproject.toml b/pyproject.toml
@@ -0,0 +1,65 @@
+[build-system]
+requires = ["setuptools>=69", "wheel"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "pepseqpred"
+version = "1.0.0rc1"
+description = "Residue-level epitope prediction pipeline for peptide/protein workflows."
+readme = "README.md"
+requires-python = ">=3.12"
+dependencies = [
+  "numpy>=2.3,<3",
+  "pandas>=2.3,<3",
+  "torch>=2.4,<3",
+  "fair-esm==2.0.0",
+  "scikit-learn>=1.5,<2",
+  "optuna>=3.5,<5"
+]
+
+[project.optional-dependencies]
+dev = [
+  "pytest>=8.0",
+  "pytest-cov>=5.0",
+  "pytest-mock>=3.14",
+  "ruff>=0.6"
+]
+
+[tool.pytest.ini_options]
+minversion = "8.0"
+addopts = "-ra --strict-markers --cov=pepseqpred --cov-report=term-missing --cov-report=xml --cov-fail-under=75"
+testpaths = ["tests"]
+pythonpath = ["src"]
+markers = [
+  "unit: fast isolated tests",
+  "integration: component interaction tests",
+  "e2e: end-to-end pipeline tests",
+  "slow: longer-running tests"
+]
+
+[tool.coverage.run]
+branch = true
+source = ["pepseqpred"]
+omit = [
+  "tests/*"
+]
+
+[tool.coverage.report]
+show_missing = true
+skip_empty = true
+precision = 2
+
+[project.scripts]
+pepseqpred-esm = "pepseqpred.apps.esm_cli:main"
+pepseqpred-labels = "pepseqpred.apps.labels_cli:main"
+pepseqpred-predict = "pepseqpred.apps.prediction_cli:main"
+pepseqpred-preprocess = "pepseqpred.apps.preprocess_cli:main"
+pepseqpred-train-ffnn = "pepseqpred.apps.train_ffnn_cli:main"
+pepseqpred-train-ffnn-optuna = "pepseqpred.apps.train_ffnn_optuna_cli:main"
+
+[tool.setuptools]
+package-dir = {"" = "src"}
+
+[tool.setuptools.packages.find]
+where = ["src"]
+include = ["pepseqpred*"]
diff --git a/requirements.txt b/requirements.txt
diff --git a/src/pepseqpred/apps/prediction_cli.py b/src/pepseqpred/apps/prediction_cli.py
@@ -213,7 +213,8 @@ def main() -> None:
     layer = esm_model.num_layers
 
     # load model from disk
-    checkpoint = torch.load(args.checkpoint, map_location="cpu")
+    checkpoint = torch.load(
+        args.checkpoint, map_location="cpu", weights_only=True)
     cli_model_cfg = _build_cli_model_config(args)
     psp_model, model_cfg, model_cfg_src = build_model_from_checkpoint(
         checkpoint,

diff --git a/src/pepseqpred/apps/preprocess_cli.py b/src/pepseqpred/apps/preprocess_cli.py
@@ -75,7 +75,7 @@ def main() -> None:
                         help="Prefix for subject column labels in z-score reactivity data.")
     parser.add_argument("--save",
                         action="store_true",
-                        dest="save_path",
+                        dest="save",
                         default=False,
                         help="Store results in a .tsv output file to be used in model training.")
 

diff --git a/src/pepseqpred/core/labels/builder.py b/src/pepseqpred/core/labels/builder.py
@@ -187,7 +187,7 @@ def _find_pt_path(self, protein_id: str) -> Path:
     def _load_embedding_length(self, protein_id: str) -> int:
         """Finds .pt path, loads embedding as tensor, and returns the length (number of amino acids)."""
         pt_path = self._find_pt_path(protein_id)
-        embedding = torch.load(pt_path, map_location="cpu")
+        embedding = torch.load(pt_path, map_location="cpu", weights_only=True)
         if not isinstance(embedding, torch.Tensor) or embedding.dim() != 2:
             raise ValueError(
                 f"Expected 2D tensor embedding for '{protein_id}', got {type(embedding)}")

diff --git a/src/pepseqpred/core/train/metrics.py b/src/pepseqpred/core/train/metrics.py
@@ -6,7 +6,8 @@
 from labels, predictions, and probabilities.
 """
 
-from typing import Dict, Any
+from typing import Dict, Any, Union, Sequence
+import numpy as np
 import torch
 from sklearn.metrics import (precision_recall_fscore_support,
                              average_precision_score,
@@ -16,7 +17,16 @@
                              auc)
 
 
-def compute_eval_metrics(y_true: torch.Tensor, y_pred: torch.Tensor, y_prob: torch.Tensor) -> Dict[str, Any]:
+ArrayLike1D = Union[torch.Tensor, np.ndarray, Sequence[float], Sequence[int]]
+
+
+def _to_numpy_1d(x: ArrayLike1D) -> np.ndarray:
+    if isinstance(x, torch.Tensor):
+        return x.detach().cpu().numpy().reshape(-1)
+    return np.asarray(x).reshape(-1)
+
+
+def compute_eval_metrics(y_true: ArrayLike1D, y_pred: ArrayLike1D, y_prob: ArrayLike1D) -> Dict[str, Any]:
     """
     Computes evaluation metrics given true lables, predicted labels, and predicted probabilities.
 
@@ -36,31 +46,48 @@ def compute_eval_metrics(y_true: torch.Tensor, y_pred: torch.Tensor, y_prob: tor
     """
     metrics: Dict[str, Any] = {}
 
-    # calculate precesion, recall, f1, and mcc
+    y_true_np = _to_numpy_1d(y_true).astype(np.int64, copy=False)
+    y_pred_np = _to_numpy_1d(y_pred).astype(np.int64, copy=False)
+    y_prob_np = _to_numpy_1d(y_prob).astype(np.float64, copy=False)
+
+    # calculate precision, recall, and f1
     precision, recall, f1, _ = precision_recall_fscore_support(
-        y_true, y_pred, average="binary", zero_division=0)
+        y_true_np, y_pred_np, average="binary", zero_division=0)
     metrics["precision"] = float(precision)
     metrics["recall"] = float(recall)
     metrics["f1"] = float(f1)
-    metrics["mcc"] = matthews_corrcoef(y_true, y_pred)
+
+    # Avoid sklearn warning when both tensors contain only one shared label.
+    if np.unique(np.concatenate((y_true_np, y_pred_np))).size < 2:
+        metrics["mcc"] = 0.0
+    else:
+        metrics["mcc"] = float(matthews_corrcoef(y_true_np, y_pred_np))
+
+    has_both_classes = np.unique(y_true_np).size >= 2
+    if not has_both_classes:
+        only_class = int(y_true_np[0]) if y_true_np.size > 0 else 0
+        metrics["auc"] = float("nan")
+        metrics["pr_auc"] = 1.0 if only_class == 1 else 0.0
+        metrics["auc10"] = float("nan")
+        return metrics
 
     # ROC AUC
     try:
-        metrics["auc"] = float(roc_auc_score(y_true, y_prob))
+        metrics["auc"] = float(roc_auc_score(y_true_np, y_prob_np))
 
     except Exception:
         metrics["auc"] = float("nan")
 
     # PR AUC
     try:
-        metrics["pr_auc"] = float(average_precision_score(y_true, y_prob))
+        metrics["pr_auc"] = float(average_precision_score(y_true_np, y_prob_np))
 
     except Exception:
         metrics["pr_auc"] = float("nan")
 
     # AUC10 calculation]
     try:
-        fpr, tpr, _ = roc_curve(y_true, y_prob)
+        fpr, tpr, _ = roc_curve(y_true_np, y_prob_np)
         mask = fpr <= 0.10
         if mask.sum() >= 2:
             metrics["auc10"] = float(auc(fpr[mask], tpr[mask]) / 0.10)

diff --git a/src/pepseqpred/core/train/weights.py b/src/pepseqpred/core/train/weights.py
@@ -100,5 +100,15 @@ def pos_weight_from_label_shards(label_shards: List[Path]) -> float:
                 f"{shard} missing class_stats (rebuild labels with --calc-pos-weight)"
             )
         total_pos += int(stats["pos_count"])
-        total_neg += int(stats["neg_counts"])
+        # Prefer the canonical key written by labels.builder, but support
+        # legacy/pluralized payloads for backwards compatibility.
+        if "neg_count" in stats:
+            total_neg += int(stats["neg_count"])
+        elif "neg_counts" in stats:
+            total_neg += int(stats["neg_counts"])
+        else:
+            raise ValueError(
+                f"{shard} class_stats missing negative count key "
+                "(expected 'neg_count' or 'neg_counts')"
+            )
     return float(total_neg / max(1, total_pos))
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -0,0 +1,36 @@
+from pathlib import Path
+import pytest
+import torch
+
+
+@pytest.fixture
+def training_artifacts(tmp_path: Path):
+    emb_dir = tmp_path / "emb"
+    emb_dir.mkdir(parents=True, exist_ok=True)
+    label_shard = tmp_path / "labels_000.pt"
+
+    labels = {}
+    pos = 0
+    neg = 0
+
+    for protein_id, family in [
+        ("P001", "111"), ("P002", "111"), ("P003", "222"), ("P004", "222")
+    ]:
+        x = torch.randn(6, 4, dtype=torch.float32)
+        torch.save(x, emb_dir / f"{protein_id}-{family}.pt")
+
+        y = torch.tensor([1, 0, 0, 1, 0, 0], dtype=torch.float32)
+        labels[protein_id] = y
+        pos += int((y == 1).sum().item())
+        neg += int((y == 0).sum().item())
+
+    payload = {
+        "labels": labels,
+        "class_stats": {
+            "pos_count": pos,
+            "neg_count": neg
+        }
+    }
+    torch.save(payload, label_shard)
+
+    return {"embedding_dir": emb_dir, "label_shard": label_shard}