Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@ CLAUDE.md
# Profiling results
traces/

# Trajectories results
tests/trajectories/results/

# uv
uv.lock

Expand Down
33 changes: 33 additions & 0 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,39 @@ implementation of a mathematical aggregator.
To deprecate some public functionality, make it raise a `DeprecationWarning`. A test should also be
added in `tests/unit/test_deprecations.py`, ensuring that this warning is issued.

## Trajectories

The `tests/trajectories/` directory contains scripts to generate and visualize optimization
trajectories using various aggregators on simple multi-objective problems. They require the `plot`
dependency group.

Available objective keys: `EWQ`, `CQF`, `CQF2`, `HQF`, `MN2`, `MN20`.

Available aggregator keys: `upgrad`, `mgda`, `cagrad`, `nashmtl`, `nashmtl20`, `graddrop`,
`imtl_g`, `aligned_mtl`, `dualproj`, `pcgrad`, `random`, `mean`.

**Step 1 — Optimize:** run the optimization for an objective and a selection of aggregators:
```bash
uv run python tests/trajectories/optimize.py EWQ upgrad mean mgda cagrad dualproj graddrop imtl_g aligned_mtl nashmtl random
```
This saves trajectory data under `tests/trajectories/results/` (gitignored).

**Step 2 — Plot:** generate the plots from the saved trajectories:
```bash
export MPLBACKEND=Agg
uv run python tests/trajectories/plot_params.py EWQ
uv run python tests/trajectories/plot_values.py EWQ
uv run python tests/trajectories/plot_distance_to_pf.py EWQ
```

Replace `EWQ` with any other objective key. The three plot scripts produce PDFs saved to
`tests/trajectories/results/<objective>/`.

> [!NOTE]
> The plot scripts require a LaTeX installation for rendering:
> `sudo apt-get install texlive-latex-extra texlive-fonts-recommended dvipng cm-super`


## Release

*This section is addressed to maintainers.*
Expand Down
Empty file.
183 changes: 183 additions & 0 deletions tests/trajectories/_constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
from math import cos, sin

import numpy as np
import torch

from torchjd._linalg import QuadprogProjector
from torchjd.aggregation import (
IMTLG,
MGDA,
AlignedMTL,
CAGrad,
DualProj,
GradDrop,
Mean,
NashMTL,
PCGrad,
Random,
UPGrad,
)
from trajectories._objectives import (
ConvexQuadraticForm,
ElementWiseQuadratic,
HomogenousQuadraticForm,
Multinorm,
QuadraticForm,
)

AGGREGATORS = {
"upgrad": UPGrad(projector=QuadprogProjector(reg_eps=1e-7, norm_eps=1e-9)),
"mgda": MGDA(),
"cagrad": CAGrad(c=0.5),
"nashmtl": NashMTL(n_tasks=2, optim_niter=1),
"nashmtl20": NashMTL(n_tasks=20, optim_niter=1),
"graddrop": GradDrop(),
"imtl_g": IMTLG(),
"aligned_mtl": AlignedMTL(),
"dualproj": DualProj(projector=QuadprogProjector(reg_eps=1e-7, norm_eps=1e-9)),
"pcgrad": PCGrad(),
"random": Random(),
"mean": Mean(),
}
LR_MULTIPLIERS = {
"upgrad": 1.0,
"mgda": 2.0,
"cagrad": 1.0,
"nashmtl": 2.0,
"nashmtl20": 2.0,
"graddrop": 0.5,
"imtl_g": 1.0,
"aligned_mtl": 4.0,
"dualproj": 1.0,
"pcgrad": 0.5,
"random": 1.0,
"mean": 1.0,
}
# Some methods have optimal LRs that are very problem-specific. This allows overriding the LR
# per-problem.
LR_MULTIPLIER_OVERRIDES = {
"HQF": {
"nashmtl": 20.0,
"imtl_g": 2.0,
},
"CQF": {"nashmtl": 0.5},
"CQF2": {"nashmtl": 0.5},
}
AGGREGATOR_ORDER = {
"upgrad": 9,
"mgda": 1,
"cagrad": 5,
"nashmtl": 7,
"nashmtl20": 7,
"graddrop": 3,
"imtl_g": 4,
"aligned_mtl": 8,
"dualproj": 2,
"random": 6,
"mean": 0,
# No location for PCGrad as it's equivalent to UPGrad with 2 objectives
}
LATEX_NAMES = {
"upgrad": r"$\mathcal A_{\mathrm{UPGrad}}$ (ours)",
"mgda": r"$\mathcal A_{\mathrm{MGDA}}$",
"cagrad": r"$\mathcal A_{\mathrm{CAGrad}}$",
"nashmtl": r"$\mathcal A_{\mathrm{Nash-MTL}}$",
"nashmtl20": r"$\mathcal A_{\mathrm{Nash-MTL}}$",
"graddrop": r"$\mathcal A_{\mathrm{GradDrop}}$",
"imtl_g": r"$\mathcal A_{\mathrm{IMTL-G}}$",
"aligned_mtl": r"$\mathcal A_{\mathrm{Aligned-MTL}}$",
"dualproj": r"$\mathcal A_{\mathrm{DualProj}}$",
"pcgrad": r"$\mathcal A_{\mathrm{PCGrad}}$",
"random": r"$\mathcal A_{\mathrm{RGW}}$",
"mean": r"$\mathcal A_{\mathrm{Mean}}$",
}

# Sometimes we need to override the xlim and ylim of the value plot to zoom enough
PLOT_VALUES_LIMS = {
"CQF": {
"xlim": (-0.125, 2.625),
"ylim": (-0.425, 8.925),
},
"CQF2": {
"xlim": (-0.125, 2.625),
"ylim": (-0.425, 8.925),
},
}

THETA = np.pi / 16

OBJECTIVES = {
"EWQ": ElementWiseQuadratic(2),
"CQF": ConvexQuadraticForm(
Bs=[
torch.tensor([[cos(THETA), -sin(THETA)], [sin(THETA), cos(THETA)]])
@ torch.diag(torch.tensor([1.0, 0.1])),
torch.tensor([[cos(THETA), sin(THETA)], [-sin(THETA), cos(THETA)]])
@ torch.diag(torch.tensor([torch.sqrt(torch.tensor(3.0)), 0.1])),
],
us=[torch.tensor([1.0, 0.0]), torch.tensor([-1.0, 0.0])],
),
"CQF2": QuadraticForm(
As=[torch.tensor([[1.0, 0.2], [0.2, 0.05]]), torch.tensor([[3.0, -0.6], [-0.6, 0.2]])],
us=[torch.tensor([1.0, 0.0]), torch.tensor([-1.0, 0.0])],
),
"HQF": HomogenousQuadraticForm(
A=torch.tensor([[2.0, -1.0], [-1.0, 2.0]]),
scales=torch.tensor([1.0, 10.0]),
us=[torch.tensor([1.0, 0.0]), torch.tensor([-10.0, 0.0])],
),
"MN2": Multinorm(torch.tensor([1.0, 10.0])),
"MN20": Multinorm(torch.arange(1, 21)),
}
BASE_LEARNING_RATES = {
"EWQ": 0.075,
"CQF": 0.125,
"CQF2": 0.125,
"HQF": 0.005,
"MN2": 0.02,
"MN20": 0.005,
}
INITIAL_POINTS = {
"EWQ": [
[3.0, -2.0],
[0.0, -3.0],
[-4.0, 4.0],
[-3.0, 4.0],
[-3.5, -0.75],
],
"CQF": [
[0.5, 0.5],
[-1.0, 7.0],
[0.0, 0.0],
[1.0, 6.0],
],
"CQF2": [
[0.5, 0.5],
[-0.3, 7.0],
[0.0, 0.0],
],
"HQF": [
[-6.0, 4.0],
[-3.0, -1.5],
[1.5, 2.0],
[2.5, 5.5],
],
"MN2": [
[0.0, 0.0],
[-5.0, 5.0],
[10.0, 5.0],
[10.0, 0.0],
[20.0, 0.0],
],
"MN20": [
[0.0] * 20,
],
}
N_ITERS = {
"EWQ": 50,
"CQF": 200,
"CQF2": 200,
"HQF": 100,
"MN2": 50,
"MN20": 500,
}
162 changes: 162 additions & 0 deletions tests/trajectories/_objectives.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
from abc import ABC, abstractmethod

import torch
from torch import Tensor


class Objective(ABC):
def __init__(self, n_params: int, n_values: int) -> None:
self.n_params = n_params
self.n_values = n_values

@abstractmethod
def __call__(self, x: Tensor) -> Tensor:
"""Compute the value of the objective function at x. It has to be a vector."""

@abstractmethod
def jacobian(self, x: Tensor) -> Tensor:
"""
Compute the value of the Jacobian of the objective function at x. It is a matrix of shape
[n_values, n_params].
"""

def __str__(self) -> str:
"""Return a string representation of the objective function."""
return self.__class__.__name__

def __repr__(self) -> str:
return f"{self.__class__.__name__}({self.n_values})"


class WithSPSMappingMixin(ABC):
"""Mixin adding the possibility to get the Strong Pareto stationary mapping."""

class SPSMapping(ABC):
@abstractmethod
def __call__(self, w: Tensor) -> Tensor:
"""
Map a vector with (strictly) positive coordinates to the corresponding strongly pareto
stationary point.
"""

@property
@abstractmethod
def sps_mapping(self) -> "WithSPSMappingMixin.SPSMapping":
pass


class QuadraticForm(Objective, WithSPSMappingMixin):
def __init__(self, As: list[Tensor], us: list[Tensor]) -> None:
if len(As) != len(us):
raise ValueError("As and us must have the same length.")

if len(As) < 1:
raise ValueError("As and us must have at least one element.")

super().__init__(n_params=len(us[0]), n_values=len(As))
# Note that if A is not PSD, the objective is not convex.
self.As = As
self.us = us

def __call__(self, x: Tensor) -> Tensor:
objective_values = [self.quad(x, A, u) for A, u in zip(self.As, self.us, strict=False)]
return torch.stack(objective_values)

def jacobian(self, x: Tensor) -> Tensor:
return torch.vstack([2 * (x - u) @ A for A, u in zip(self.As, self.us, strict=False)])

def __repr__(self) -> str:
return f"{self.__class__.__name__}(As={self.As}, us={self.us})"

@staticmethod
def quad(x: Tensor, A: Tensor, u: Tensor) -> Tensor:
x_minus_u = x - u
return x_minus_u @ A @ x_minus_u

class SPSMapping(WithSPSMappingMixin.SPSMapping):
def __init__(self, As: list[Tensor], us: list[Tensor]) -> None:
self.As = As
self.us = us

def __call__(self, w: Tensor) -> Tensor:
G = torch.stack([weight * A for weight, A in zip(w, self.As, strict=False)]).sum(dim=0)
b = torch.stack(
[weight * A @ u for weight, A, u in zip(w, self.As, self.us, strict=False)]
).sum(dim=0)
return torch.linalg.lstsq(G, b, driver="gelsd").solution

@property
def sps_mapping(self) -> "QuadraticForm.SPSMapping":
return self.SPSMapping(self.As, self.us)


class HomogenousQuadraticForm(QuadraticForm):
def __init__(self, A: Tensor, scales: Tensor, us: list[Tensor]) -> None:
self.A = A
self.scales = scales
As = [A * scale for scale in scales]
super().__init__(As=As, us=us)

def __repr__(self) -> str:
return f"{self.__class__.__name__}(A={self.A}, scales={self.scales}, us={self.us})"


class ConvexQuadraticForm(QuadraticForm):
def __init__(self, Bs: list[Tensor], us: list[Tensor]) -> None:
self.Bs = Bs
super().__init__(As=[B @ B.T for B in self.Bs], us=us)

def __repr__(self) -> str:
return f"{self.__class__.__name__}(Bs={self.Bs}, us={self.us})"


class ElementWiseQuadratic(Objective, WithSPSMappingMixin):
def __init__(self, n_dim: int) -> None:
super().__init__(n_params=n_dim, n_values=n_dim)

def __call__(self, x: Tensor) -> Tensor:
if len(x) != self.n_values:
raise ValueError("x must have the same length as the number of values.")
return x**2

def jacobian(self, x: Tensor) -> Tensor:
return torch.diag(torch.stack([2 * x[0], 2 * x[1]]))

class SPSMapping(WithSPSMappingMixin.SPSMapping):
def __init__(self, n_values: int) -> None:
self.n_values = n_values

def __call__(self, w: Tensor) -> Tensor: # noqa: ARG002
return torch.zeros(self.n_values)

@property
def sps_mapping(self) -> "ElementWiseQuadratic.SPSMapping":
return self.SPSMapping(self.n_values)


class Multinorm(Objective, WithSPSMappingMixin):
def __init__(self, a: Tensor) -> None:
n = len(a)
super().__init__(n_params=n, n_values=n)
self.a = a

def __call__(self, x: Tensor) -> Tensor:
if len(x) != self.n_values:
raise ValueError("x must have the same length as the number of values.")

# f_i(x) = a_i * || x - a_i * e_i ||²
return self.a * torch.norm(x.expand(len(x), len(x)) - torch.diag(self.a), dim=1) ** 2

def jacobian(self, x: Tensor) -> Tensor:
return self.a * 2 * (x.expand(len(x), len(x)) - torch.diag(self.a))

class SPSMapping(WithSPSMappingMixin.SPSMapping):
def __init__(self, a: Tensor) -> None:
self.a = a

def __call__(self, w: Tensor) -> Tensor:
return w * self.a

@property
def sps_mapping(self) -> "Multinorm.SPSMapping":
return self.SPSMapping(self.a)
Loading
Loading