From c84d01c529ffab95d3296e41936b6e5749593f41 Mon Sep 17 00:00:00 2001 From: Daniel Roth <118249021+da-roth@users.noreply.github.com> Date: Fri, 12 Dec 2025 08:58:23 +0100 Subject: [PATCH 001/122] forge integration --- .github/workflows/benchmark-baseline.yaml | 162 + .github/workflows/benchmark.yaml | 180 + .github/workflows/ci.yaml | 481 +- .gitignore | 405 + CMakeLists.txt | 130 +- src/qlrisks-forge/ForgeBackend.hpp | 278 + src/qlrisks-forge/ForgeBackendAVX.hpp | 322 + src/qlrisks-forge/ForgeBackendAVX_CAPI.hpp | 287 + src/qlrisks-forge/ForgeBackendCAPI.hpp | 272 + src/qlrisks-forge/ForgeBackends.hpp | 54 + test-suite/CMakeLists.txt | 62 +- test-suite/forgebackend_xad.cpp | 298 + test-suite/jit_xad.cpp | 345 + test-suite/quantlibrisks_benchmark.cpp | 20 + test-suite/swaption_benchmark.cpp | 2517 +++++++ test-suite/swaption_benchmark_baseline.cpp | 993 +++ test-suite/swaption_jit_pipeline_xad.cpp | 7938 ++++++++++++++++++++ 17 files changed, 14409 insertions(+), 335 deletions(-) create mode 100644 .github/workflows/benchmark-baseline.yaml create mode 100644 .github/workflows/benchmark.yaml create mode 100644 .gitignore create mode 100644 src/qlrisks-forge/ForgeBackend.hpp create mode 100644 src/qlrisks-forge/ForgeBackendAVX.hpp create mode 100644 src/qlrisks-forge/ForgeBackendAVX_CAPI.hpp create mode 100644 src/qlrisks-forge/ForgeBackendCAPI.hpp create mode 100644 src/qlrisks-forge/ForgeBackends.hpp create mode 100644 test-suite/forgebackend_xad.cpp create mode 100644 test-suite/jit_xad.cpp create mode 100644 test-suite/quantlibrisks_benchmark.cpp create mode 100644 test-suite/swaption_benchmark.cpp create mode 100644 test-suite/swaption_benchmark_baseline.cpp create mode 100644 test-suite/swaption_jit_pipeline_xad.cpp diff --git a/.github/workflows/benchmark-baseline.yaml b/.github/workflows/benchmark-baseline.yaml new file mode 100644 index 0000000..34bd10a --- /dev/null +++ b/.github/workflows/benchmark-baseline.yaml @@ -0,0 +1,162 @@ +############################################################################## +# +# QuantLib-Risks Baseline Benchmark Workflow +# +# Builds QuantLib with original XAD (no JIT/Forge) and runs the baseline +# benchmark for comparison with JIT-accelerated versions. +# +# This uses: +# - Original XAD from auto-differentiation/xad +# - Original QuantLib-Risks-Cpp from auto-differentiation/QuantLib-Risks-Cpp +# - No Forge dependencies +# +# Copyright (C) 2025 The QuantLib-Risks-Cpp-Forge Authors +# +# SPDX-License-Identifier: AGPL-3.0-or-later +# +############################################################################## + +name: Benchmark (Baseline) + +on: + push: + pull_request: + workflow_dispatch: + inputs: + ql_repo: + description: QuantLib repository in / format + required: true + default: lballabio/QuantLib + ql_branch: + description: Branch or tag for QuantLib repository + required: true + default: master + xad_repo: + description: XAD repository in / format + required: true + default: auto-differentiation/xad + xad_branch: + description: Branch or tag for XAD repository + required: true + default: main + qlrisks_repo: + description: QuantLib-Risks-Cpp repository in / format + required: true + default: auto-differentiation/QuantLib-Risks-Cpp + qlrisks_branch: + description: Branch or tag for QuantLib-Risks-Cpp repository + required: true + default: main + +env: + ql_repo: ${{ github.event.inputs.ql_repo || 'lballabio/QuantLib' }} + ql_branch: ${{ github.event.inputs.ql_branch || 'master' }} + xad_repo: ${{ github.event.inputs.xad_repo || 'auto-differentiation/xad' }} + xad_branch: ${{ github.event.inputs.xad_branch || 'main' }} + qlrisks_repo: ${{ github.event.inputs.qlrisks_repo || 'auto-differentiation/QuantLib-Risks-Cpp' }} + qlrisks_branch: ${{ github.event.inputs.qlrisks_branch || 'main' }} + +jobs: + linux-baseline: + runs-on: ubuntu-latest + container: ghcr.io/lballabio/quantlib-devenv:rolling + + name: Linux Baseline (Original XAD) + + steps: + - name: Checkout QuantLib + uses: actions/checkout@v4 + with: + repository: ${{ env.ql_repo }} + ref: ${{ env.ql_branch }} + path: QuantLib + + - name: Checkout XAD (Original) + uses: actions/checkout@v4 + with: + repository: ${{ env.xad_repo }} + ref: ${{ env.xad_branch }} + path: xad + + - name: Checkout QuantLib-Risks-Cpp (Original) + uses: actions/checkout@v4 + with: + repository: ${{ env.qlrisks_repo }} + ref: ${{ env.qlrisks_branch }} + path: QuantLib-Risks-Cpp + + - name: Checkout QuantLib-Risks-Cpp-Forge (for baseline benchmark) + uses: actions/checkout@v4 + with: + path: QuantLib-Risks-Cpp-Forge + + - name: Copy baseline benchmark to QuantLib-Risks-Cpp + run: | + # Copy benchmark files from Forge repo to original QuantLib-Risks-Cpp + cp QuantLib-Risks-Cpp-Forge/test-suite/swaption_benchmark_baseline.cpp \ + QuantLib-Risks-Cpp/test-suite/ + cp QuantLib-Risks-Cpp-Forge/test-suite/quantlibrisks_benchmark.cpp \ + QuantLib-Risks-Cpp/test-suite/ + cp QuantLib-Risks-Cpp-Forge/test-suite/utilities_xad.cpp \ + QuantLib-Risks-Cpp/test-suite/ + cp QuantLib-Risks-Cpp-Forge/test-suite/utilities_xad.hpp \ + QuantLib-Risks-Cpp/test-suite/ + # Add baseline benchmark to CMakeLists.txt + cat >> QuantLib-Risks-Cpp/test-suite/CMakeLists.txt << 'EOF' + +# Baseline benchmark (no Forge/JIT) - patched in from QuantLib-Risks-Cpp-Forge +add_executable(quantlib-risks-benchmark-baseline + quantlibrisks_benchmark.cpp + swaption_benchmark_baseline.cpp + utilities_xad.cpp + utilities_xad.hpp +) +set_target_properties(quantlib-risks-benchmark-baseline PROPERTIES OUTPUT_NAME "quantlib-risks-benchmark-baseline") +if (NOT Boost_USE_STATIC_LIBS) + target_compile_definitions(quantlib-risks-benchmark-baseline PRIVATE BOOST_ALL_DYN_LINK) +endif() +target_link_libraries(quantlib-risks-benchmark-baseline PRIVATE + QuantLib-Risks + ql_library + ${QL_THREAD_LIBRARIES}) +EOF + + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2.12 + with: + key: linux-baseline + max-size: 650M + + - name: Setup + run: | + apt-get update && apt-get install -y ninja-build ccache + + - name: Configure QuantLib with XAD (Original) + run: | + cd QuantLib + mkdir build + cd build + cmake -G Ninja -DBOOST_ROOT=/usr \ + -DCMAKE_CXX_STANDARD=17 \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ + -DXAD_WARNINGS_PARANOID=OFF \ + -DQL_EXTERNAL_SUBDIRECTORIES="$(pwd)/../../xad;$(pwd)/../../QuantLib-Risks-Cpp" \ + -DQL_EXTRA_LINK_LIBRARIES=QuantLib-Risks \ + -DQL_NULL_AS_FUNCTIONS=ON \ + -DQL_BUILD_TEST_SUITE=OFF \ + -DQL_BUILD_EXAMPLES=OFF \ + -DQL_BUILD_BENCHMARK=OFF \ + -DQLRISKS_DISABLE_AAD=OFF \ + -DQLRISKS_BUILD_TEST_SUITE=ON \ + .. + + - name: Build + run: | + cd QuantLib/build + cmake --build . --target quantlib-risks-benchmark-baseline + + - name: Run Baseline Benchmark + run: | + cd QuantLib/build + ./QuantLib-Risks-Cpp/test-suite/quantlib-risks-benchmark-baseline --log_level=message diff --git a/.github/workflows/benchmark.yaml b/.github/workflows/benchmark.yaml new file mode 100644 index 0000000..f4ee6bc --- /dev/null +++ b/.github/workflows/benchmark.yaml @@ -0,0 +1,180 @@ +############################################################################## +# +# QuantLib-Risks-Cpp-Forge Benchmark Workflow +# +# Builds QuantLib with XAD-JIT (Forge-enabled) and runs the benchmark suite. +# +# Copyright (C) 2025 The QuantLib-Risks-Cpp-Forge Authors +# +# SPDX-License-Identifier: AGPL-3.0-or-later +# +############################################################################## + +name: Benchmark + +on: + push: + pull_request: + workflow_dispatch: + inputs: + ql_repo: + description: QuantLib repository in / format + required: true + default: lballabio/QuantLib + ql_branch: + description: Branch or tag for QuantLib repository + required: true + default: master + xad_repo: + description: XAD-JIT repository in / format + required: true + default: da-roth/xad-jit + xad_branch: + description: Branch or tag for XAD-JIT repository + required: true + default: main + forge_repo: + description: Forge repository in / format + required: true + default: da-roth/forge + forge_branch: + description: Branch or tag for Forge repository + required: true + default: main + +env: + ql_repo: ${{ github.event.inputs.ql_repo || 'lballabio/QuantLib' }} + ql_branch: ${{ github.event.inputs.ql_branch || 'master' }} + xad_repo: ${{ github.event.inputs.xad_repo || 'da-roth/xad-jit' }} + xad_branch: ${{ github.event.inputs.xad_branch || 'main' }} + forge_repo: ${{ github.event.inputs.forge_repo || 'da-roth/forge' }} + forge_branch: ${{ github.event.inputs.forge_branch || 'main' }} + +jobs: + linux-benchmark: + runs-on: ubuntu-latest + container: ghcr.io/lballabio/quantlib-devenv:rolling + strategy: + fail-fast: false + matrix: + capi: [off, on] + + name: Linux (${{ matrix.capi == 'on' && 'C API' || 'C++ API' }}) + + steps: + - name: Checkout QuantLib + uses: actions/checkout@v4 + with: + repository: ${{ env.ql_repo }} + ref: ${{ env.ql_branch }} + path: QuantLib + + - name: Checkout XAD-JIT + uses: actions/checkout@v4 + with: + repository: ${{ env.xad_repo }} + ref: ${{ env.xad_branch }} + path: xad-jit + + - name: Checkout Forge + uses: actions/checkout@v4 + with: + repository: ${{ env.forge_repo }} + ref: ${{ env.forge_branch }} + path: forge + + - name: Checkout QuantLib-Risks-Cpp-Forge + uses: actions/checkout@v4 + with: + path: QuantLib-Risks-Cpp-Forge + + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2.12 + with: + key: linux-benchmark-${{ matrix.capi }} + max-size: 650M + + - name: Setup + run: | + apt-get update && apt-get install -y ninja-build ccache + + - name: Build Forge (C++ API) + if: matrix.capi == 'off' + run: | + cd forge + cmake -B build -S tools/packaging \ + -G Ninja \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_INSTALL_PREFIX=$(pwd)/../install + cmake --build build --config Release + cmake --install build --config Release + + - name: Build Forge (C API) + if: matrix.capi == 'on' + run: | + cd forge + cmake -B build -S tools/capi \ + -G Ninja \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_INSTALL_PREFIX=$(pwd)/../install + cmake --build build --config Release + cmake --install build --config Release + + - name: Configure QuantLib with XAD-JIT + Forge (C++ API) + if: matrix.capi == 'off' + run: | + cd QuantLib + mkdir build + cd build + cmake -G Ninja -DBOOST_ROOT=/usr \ + -DCMAKE_CXX_STANDARD=17 \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ + -DXAD_WARNINGS_PARANOID=OFF \ + -DCMAKE_PREFIX_PATH=$(pwd)/../../install \ + -DQL_EXTERNAL_SUBDIRECTORIES="$(pwd)/../../xad-jit;$(pwd)/../../QuantLib-Risks-Cpp-Forge" \ + -DQL_EXTRA_LINK_LIBRARIES=QuantLib-Risks \ + -DQL_NULL_AS_FUNCTIONS=ON \ + -DQL_BUILD_TEST_SUITE=OFF \ + -DQL_BUILD_EXAMPLES=OFF \ + -DQL_BUILD_BENCHMARK=OFF \ + -DQLRISKS_DISABLE_AAD=OFF \ + -DQLRISKS_BUILD_TEST_SUITE=OFF \ + -DQLRISKS_BUILD_BENCHMARK=ON \ + -DQLRISKS_ENABLE_FORGE_TESTS=ON \ + .. + + - name: Configure QuantLib with XAD-JIT + Forge (C API) + if: matrix.capi == 'on' + run: | + cd QuantLib + mkdir build + cd build + cmake -G Ninja -DBOOST_ROOT=/usr \ + -DCMAKE_CXX_STANDARD=17 \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ + -DXAD_WARNINGS_PARANOID=OFF \ + -DCMAKE_PREFIX_PATH=$(pwd)/../../install \ + -DQL_EXTERNAL_SUBDIRECTORIES="$(pwd)/../../xad-jit;$(pwd)/../../QuantLib-Risks-Cpp-Forge" \ + -DQL_EXTRA_LINK_LIBRARIES=QuantLib-Risks \ + -DQL_NULL_AS_FUNCTIONS=ON \ + -DQL_BUILD_TEST_SUITE=OFF \ + -DQL_BUILD_EXAMPLES=OFF \ + -DQL_BUILD_BENCHMARK=OFF \ + -DQLRISKS_DISABLE_AAD=OFF \ + -DQLRISKS_USE_FORGE_CAPI=ON \ + -DQLRISKS_BUILD_TEST_SUITE=OFF \ + -DQLRISKS_BUILD_BENCHMARK=ON \ + -DQLRISKS_ENABLE_FORGE_TESTS=ON \ + .. + + - name: Build + run: | + cd QuantLib/build + cmake --build . + + - name: Run Benchmark + run: | + cd QuantLib/build + ./QuantLib-Risks-Cpp-Forge/test-suite/quantlib-risks-benchmark --log_level=message diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 54af9a0..aacfd53 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -1,18 +1,30 @@ -# Workflow to run against the corresponding QuantLib release, -# testing if the build and tests are working fine -# Note: In addition to pushes/pull requests, this workflow -# can also be executed manually, and the repositories / branches -# for QuantLib and XAD can be provided in this case. +############################################################################## +# +# QuantLib-Risks-Cpp-Forge CI Workflow +# +# Builds QuantLib with XAD-JIT (Forge-enabled) and runs the QuantLib-Risks +# test suite including Forge JIT tests. +# +# This workflow: +# 1. Builds Forge as a pre-built package (isolates AVX2 flags) +# 2. Configures QuantLib with XAD-JIT + QuantLib-Risks-Cpp-Forge +# 3. Runs the quantlib-risks-test-suite with Forge tests enabled +# +# Copyright (C) 2025 The QuantLib-Risks-Cpp-Forge Authors +# +# SPDX-License-Identifier: AGPL-3.0-or-later +# +############################################################################## + name: CI -on: - repository_dispatch: - types: [xad-ci-trigger] + +on: push: pull_request: workflow_dispatch: inputs: ql_repo: - description: Quantlib repository in / format + description: QuantLib repository in / format required: true default: lballabio/QuantLib ql_branch: @@ -20,347 +32,154 @@ on: required: true default: master xad_repo: - description: XAD repository in / format + description: XAD-JIT repository in / format required: true - default: auto-differentiation/xad + default: da-roth/xad-jit xad_branch: - description: Branch or tag for XAD repository + description: Branch or tag for XAD-JIT repository required: true default: main - schedule: - - cron: '02 5 * * *' # 5:02 every day + forge_repo: + description: Forge repository in / format + required: true + default: da-roth/forge + forge_branch: + description: Branch or tag for Forge repository + required: true + default: main + env: ql_repo: ${{ github.event.inputs.ql_repo || 'lballabio/QuantLib' }} ql_branch: ${{ github.event.inputs.ql_branch || 'master' }} - xad_repo: ${{ github.event.inputs.xad_repo || 'auto-differentiation/xad' }} + xad_repo: ${{ github.event.inputs.xad_repo || 'da-roth/xad-jit' }} xad_branch: ${{ github.event.inputs.xad_branch || 'main' }} -jobs: + forge_repo: ${{ github.event.inputs.forge_repo || 'da-roth/forge' }} + forge_branch: ${{ github.event.inputs.forge_branch || 'main' }} - xad-linux: +jobs: + linux-forge: strategy: fail-fast: false matrix: - disable_aad: ["ON", "OFF"] - cxx: ["17", "20"] + build_type: ["Release", "Debug"] + capi: [off, on] runs-on: ubuntu-latest container: ghcr.io/lballabio/quantlib-devenv:rolling + + name: Linux ${{ matrix.build_type }} (${{ matrix.capi == 'on' && 'C API' || 'C++ API' }}) + steps: - - uses: actions/checkout@v4 - with: - repository: ${{ env.ql_repo }} - ref: ${{ env.ql_branch }} - path: QuantLib - - uses: actions/checkout@v4 - with: - repository: ${{ env.xad_repo }} - ref: ${{ env.xad_branch }} - path: xad - - uses: actions/checkout@v4 - with: - path: QuantLib-Risks-Cpp - - name: ccache - uses: hendrikmuhs/ccache-action@v1.2.12 - with: - key: linux-${{ matrix.disable_aad }} - max-size: 650M - - name: Setup - run: | - apt-get update \ - && apt install -y ccache ninja-build - - name: Configure - run: | - rm -rf ${{ github.workspace }}/install - cd QuantLib - mkdir build - cd build - cmake -G Ninja -DBOOST_ROOT=/usr \ - -DCMAKE_CXX_STANDARD=${{ matrix.cxx }} \ - -DQLRISKS_DISABLE_AAD=${{ matrix.disable_aad }} \ - -DCMAKE_BUILD_TYPE=Release \ - -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ - -DQL_EXTERNAL_SUBDIRECTORIES="$(pwd)/../../xad;$(pwd)/../../QuantLib-Risks-Cpp" \ - -DQL_EXTRA_LINK_LIBRARIES=QuantLib-Risks \ - -DQL_NULL_AS_FUNCTIONS=ON \ - -DCMAKE_INSTALL_PREFIX=$(pwd)/../../install \ - .. - - name: Compile - run: | - cd QuantLib/build - cmake --build . - - name: Test QuantLib - run: | - cd QuantLib/build - ./test-suite/quantlib-test-suite --log_level=message - - name: Test QuantLib-Risks - if: ${{ matrix.disable_aad == 'OFF' }} - run: | - cd QuantLib/build - ./QuantLib-Risks-Cpp/test-suite/quantlib-risks-test-suite --log_level=message - - name: Install - if: ${{ matrix.disable_aad == 'OFF' }} - run: | - cd QuantLib/build - cmake --install . - - name: Test Install - if: ${{ matrix.disable_aad == 'OFF' }} - run: | - mkdir installtest - cp QuantLib-Risks-Cpp/Examples/AdjointSwap/AdjointSwapXAD.cpp installtest - cd installtest - echo "cmake_minimum_required(VERSION 3.15.2)" > CMakeLists.txt - echo "project(QlTest LANGUAGES CXX)" >> CMakeLists.txt - echo "find_package(QuantLib-Risks REQUIRED)" >> CMakeLists.txt - echo "add_executable(AdjointSwapXAD AdjointSwapXAD.cpp)" >> CMakeLists.txt - echo "target_link_libraries(AdjointSwapXAD PRIVATE QuantLib::QuantLib)" >> CMakeLists.txt - echo "target_compile_features(AdjointSwapXAD PUBLIC cxx_std_17)" >> CMakeLists.txt - mkdir build - cd build - cmake -G Ninja -DBOOST_ROOT=/usr \ - -DCMAKE_BUILD_TYPE=Release \ - -DCMAKE_PREFIX_PATH=$(pwd)/../../install \ - .. - cmake --build . - ./AdjointSwapXAD + - name: Checkout QuantLib + uses: actions/checkout@v4 + with: + repository: ${{ env.ql_repo }} + ref: ${{ env.ql_branch }} + path: QuantLib + - name: Checkout XAD-JIT + uses: actions/checkout@v4 + with: + repository: ${{ env.xad_repo }} + ref: ${{ env.xad_branch }} + path: xad-jit + - name: Checkout Forge + uses: actions/checkout@v4 + with: + repository: ${{ env.forge_repo }} + ref: ${{ env.forge_branch }} + path: forge + - name: Checkout QuantLib-Risks-Cpp-Forge + uses: actions/checkout@v4 + with: + path: QuantLib-Risks-Cpp-Forge - xad-win: - strategy: - fail-fast: false - matrix: - disable_aad: ["ON", "OFF"] - cxx: ["17", "20"] - runs-on: windows-2022 - env: - vsvarsall: C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat - steps: - - uses: actions/checkout@v3 - with: - repository: ${{ env.ql_repo }} - ref: ${{ env.ql_branch }} - path: QuantLib - - uses: actions/checkout@v3 - with: - repository: ${{ env.xad_repo }} - ref: ${{ env.xad_branch }} - path: xad - - uses: actions/checkout@v3 - with: - path: QuantLib-Risks-Cpp - - name: sccache - uses: hendrikmuhs/ccache-action@v1.2.12 - with: - key: windows-${{ matrix.disable_aad }} - variant: sccache - max-size: 650M - - name: Setup - run: | - choco install -y ninja - $Url = "https://downloads.sourceforge.net/project/boost/boost-binaries/1.84.0/boost_1_84_0-msvc-14.3-64.exe" - (New-Object System.Net.WebClient).DownloadFile($Url, "$RUNNER_TEMP\boost.exe") - Start-Process -Wait -FilePath "$RUNNER_TEMP\boost.exe" "/SILENT","/SP-","/SUPPRESSMSGBOXES","/DIR=C:\local\boost" - - name: Configure - env: - BOOST_ROOT: C:\local\boost - shell: cmd - run: | - cd QuantLib - mkdir build - cd build - call "${{ env.vsvarsall }}" amd64 - cmake .. -G Ninja -DQLRISKS_DISABLE_AAD=${{ matrix.disable_aad }} ^ - -DCMAKE_CXX_STANDARD=${{ matrix.cxx }} ^ - -DCMAKE_CXX_COMPILER_LAUNCHER=sccache ^ - -DCMAKE_BUILD_TYPE=Release ^ - -DQL_EXTERNAL_SUBDIRECTORIES="${{ github.workspace }}/xad;${{ github.workspace }}/QuantLib-Risks-Cpp" ^ - -DQL_EXTRA_LINK_LIBRARIES=QuantLib-Risks ^ - -DQL_NULL_AS_FUNCTIONS=ON ^ - -DXAD_STATIC_MSVC_RUNTIME=ON ^ - -DCMAKE_INSTALL_PREFIX=${{ github.workspace }}/install - - name: Build - shell: cmd - run: | - cd QuantLib\build - call "${{ env.vsvarsall }}" amd64 - cmake --build . - - name: Test QuantLib - shell: cmd - run: | - cd QuantLib\build - call "${{ env.vsvarsall }}" amd64 - .\test-suite\quantlib-test-suite --log_level=message - - name: Test QuantLib-Risks - if: ${{ matrix.disable_aad == 'OFF' }} - shell: cmd - run: | - cd QuantLib\build - call "${{ env.vsvarsall }}" amd64 - .\QuantLib-Risks-Cpp\test-suite\quantlib-risks-test-suite --log_level=message - - name: Install - if: ${{ matrix.disable_aad == 'OFF' }} - run: | - cd QuantLib/build - cmake --install . - - name: Test Install - if: ${{ matrix.disable_aad == 'OFF' }} - env: - BOOST_ROOT: C:\local\boost - shell: cmd - run: | - mkdir installtest - copy QuantLib-Risks-Cpp\Examples\AdjointSwap\AdjointSwapXAD.cpp installtest - cd installtest - echo cmake_minimum_required(VERSION 3.15.2) > CMakeLists.txt - echo project(QlTest LANGUAGES CXX) >> CMakeLists.txt - echo find_package(QuantLib-Risks REQUIRED) >> CMakeLists.txt - echo add_executable(AdjointSwapXAD AdjointSwapXAD.cpp) >> CMakeLists.txt - echo target_link_libraries(AdjointSwapXAD PRIVATE QuantLib::QuantLib) >> CMakeLists.txt - echo set_target_properties(AdjointSwapXAD PROPERTIES MSVC_RUNTIME_LIBRARY MultiThreaded) >> CMakeLists.txt - echo target_compile_features(AdjointSwapXAD PUBLIC cxx_std_17) >> CMakeLists.txt - mkdir build - cd build - call "${{ env.vsvarsall }}" amd64 - cmake -G Ninja -DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH=${{ github.workspace }}/install .. - cmake --build . - AdjointSwapXAD.exe + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2.12 + with: + key: linux-forge-${{ matrix.build_type }}-${{ matrix.capi }} + max-size: 650M + - name: Setup + run: | + apt-get update && apt-get install -y ninja-build ccache - xad-macos: - strategy: - fail-fast: false - matrix: - disable_aad: ["ON", "OFF"] - cxx: ["17", "20"] - runs-on: macos-latest - steps: - - uses: actions/checkout@v4 - with: - repository: ${{ env.ql_repo }} - ref: ${{ env.ql_branch }} - path: QuantLib - - uses: actions/checkout@v4 - with: - repository: ${{ env.xad_repo }} - ref: ${{ env.xad_branch }} - path: xad - - uses: actions/checkout@v4 - with: - path: QuantLib-Risks-Cpp - - name: Setup - run: | - brew install boost - brew install ninja - brew install ccache - - name: ccache - uses: hendrikmuhs/ccache-action@v1.2.12 - with: - key: macos-${{ matrix.disable_aad }} - max-size: 650M - - name: Configure - run: | - cd QuantLib - mkdir build - cd build - cmake -G Ninja -DBOOST_ROOT=/usr \ - -DCMAKE_CXX_STANDARD=${{ matrix.cxx }} \ - -DQLRISKS_DISABLE_AAD=${{ matrix.disable_aad }} \ - -DCMAKE_BUILD_TYPE=Release \ - -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ - -DQL_EXTERNAL_SUBDIRECTORIES="${{ github.workspace }}/xad;${{ github.workspace }}/QuantLib-Risks-Cpp" \ - -DQL_EXTRA_LINK_LIBRARIES=QuantLib-Risks \ - -DQL_NULL_AS_FUNCTIONS=ON \ - -DCMAKE_INSTALL_PREFIX=${{ github.workspace }}/install \ - .. - - name: Compile - run: | - cd QuantLib/build - cmake --build . - - name: Test QuantLib - run: | - cd QuantLib/build - ./test-suite/quantlib-test-suite --log_level=message - - name: Test QuantLib-Risks - if: ${{ matrix.disable_aad == 'OFF' }} - run: | - cd QuantLib/build - ./QuantLib-Risks-Cpp/test-suite/quantlib-risks-test-suite --log_level=message - - name: Install - if: ${{ matrix.disable_aad == 'OFF' }} - run: | - cd QuantLib/build - cmake --install . - - name: Test Install - if: ${{ matrix.disable_aad == 'OFF' }} - run: | - mkdir installtest - cp QuantLib-Risks-Cpp/Examples/AdjointSwap/AdjointSwapXAD.cpp installtest - cd installtest - echo "cmake_minimum_required(VERSION 3.15.2)" > CMakeLists.txt - echo "project(QlTest LANGUAGES CXX)" >> CMakeLists.txt - echo "find_package(QuantLib-Risks REQUIRED)" >> CMakeLists.txt - echo "add_executable(AdjointSwapXAD AdjointSwapXAD.cpp)" >> CMakeLists.txt - echo "target_link_libraries(AdjointSwapXAD PRIVATE QuantLib::QuantLib)" >> CMakeLists.txt - echo "target_compile_features(AdjointSwapXAD PUBLIC cxx_std_17)" >> CMakeLists.txt - mkdir build - cd build - cmake -G Ninja -DBOOST_ROOT=/usr \ - -DCMAKE_BUILD_TYPE=Release \ - -DCMAKE_PREFIX_PATH=${{ github.workspace }}/install \ - .. - cmake --build . - ./AdjointSwapXAD + - name: Build Forge (C++ API) + if: matrix.capi == 'off' + run: | + cd forge + cmake -B build -S tools/packaging \ + -G Ninja \ + -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \ + -DCMAKE_INSTALL_PREFIX=$(pwd)/../install + cmake --build build --config ${{ matrix.build_type }} + cmake --install build --config ${{ matrix.build_type }} - xad-linux-std-classes: - strategy: - fail-fast: false - runs-on: ubuntu-latest - container: ghcr.io/lballabio/quantlib-devenv:rolling - steps: - - uses: actions/checkout@v4 - with: - repository: ${{ env.ql_repo }} - ref: ${{ env.ql_branch }} - path: QuantLib - - uses: actions/checkout@v4 - with: - repository: ${{ env.xad_repo }} - ref: ${{ env.xad_branch }} - path: xad - - uses: actions/checkout@v4 - with: - path: QuantLib-Risks-Cpp - - name: Setup - run: | - apt-get update \ - && apt install -y ccache ninja-build \ - - name: ccache - uses: hendrikmuhs/ccache-action@v1.2.12 - with: - key: linux-std-classes - max-size: 650M - - name: Configure - run: | - cd QuantLib - mkdir build - cd build - cmake -G Ninja -DBOOST_ROOT=/usr \ - -DQL_USE_STD_CLASSES=ON \ - -DCMAKE_BUILD_TYPE=Release \ - -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ - -DQL_EXTERNAL_SUBDIRECTORIES="$(pwd)/../../xad;$(pwd)/../../QuantLib-Risks-Cpp" \ - -DQL_EXTRA_LINK_LIBRARIES=QuantLib-Risks \ - -DQL_NULL_AS_FUNCTIONS=ON \ - .. - - name: Compile - run: | - cd QuantLib/build - cmake --build . - - name: Test QuantLib - run: | - cd QuantLib/build - ./test-suite/quantlib-test-suite --log_level=message - - name: Test QuantLib-Risks - run: | - cd QuantLib/build - ./QuantLib-Risks-Cpp/test-suite/quantlib-risks-test-suite --log_level=message + - name: Build Forge (C API) + if: matrix.capi == 'on' + run: | + cd forge + cmake -B build -S tools/capi \ + -G Ninja \ + -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \ + -DCMAKE_INSTALL_PREFIX=$(pwd)/../install + cmake --build build --config ${{ matrix.build_type }} + cmake --install build --config ${{ matrix.build_type }} + + - name: Configure QuantLib with XAD-JIT + Forge (C++ API) + if: matrix.capi == 'off' + run: | + cd QuantLib + mkdir build + cd build + cmake -G Ninja -DBOOST_ROOT=/usr \ + -DCMAKE_CXX_STANDARD=17 \ + -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \ + -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ + -DXAD_WARNINGS_PARANOID=OFF \ + -DCMAKE_PREFIX_PATH=$(pwd)/../../install \ + -DQL_EXTERNAL_SUBDIRECTORIES="$(pwd)/../../xad-jit;$(pwd)/../../QuantLib-Risks-Cpp-Forge" \ + -DQL_EXTRA_LINK_LIBRARIES=QuantLib-Risks \ + -DQL_NULL_AS_FUNCTIONS=ON \ + -DQL_BUILD_TEST_SUITE=OFF \ + -DQL_BUILD_EXAMPLES=OFF \ + -DQL_BUILD_BENCHMARK=OFF \ + -DQLRISKS_DISABLE_AAD=OFF \ + -DQLRISKS_BUILD_TEST_SUITE=ON \ + -DQLRISKS_ENABLE_FORGE_TESTS=ON \ + .. + + - name: Configure QuantLib with XAD-JIT + Forge (C API) + if: matrix.capi == 'on' + run: | + cd QuantLib + mkdir build + cd build + cmake -G Ninja -DBOOST_ROOT=/usr \ + -DCMAKE_CXX_STANDARD=17 \ + -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \ + -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ + -DXAD_WARNINGS_PARANOID=OFF \ + -DCMAKE_PREFIX_PATH=$(pwd)/../../install \ + -DQL_EXTERNAL_SUBDIRECTORIES="$(pwd)/../../xad-jit;$(pwd)/../../QuantLib-Risks-Cpp-Forge" \ + -DQL_EXTRA_LINK_LIBRARIES=QuantLib-Risks \ + -DQL_NULL_AS_FUNCTIONS=ON \ + -DQL_BUILD_TEST_SUITE=OFF \ + -DQL_BUILD_EXAMPLES=OFF \ + -DQL_BUILD_BENCHMARK=OFF \ + -DQLRISKS_DISABLE_AAD=OFF \ + -DQLRISKS_USE_FORGE_CAPI=ON \ + -DQLRISKS_BUILD_TEST_SUITE=ON \ + -DQLRISKS_ENABLE_FORGE_TESTS=ON \ + .. + + - name: Build + run: | + cd QuantLib/build + cmake --build . + + - name: Test QuantLib-Risks + run: | + cd QuantLib/build + ./QuantLib-Risks-Cpp-Forge/test-suite/quantlib-risks-test-suite --log_level=message diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..a6365cc --- /dev/null +++ b/.gitignore @@ -0,0 +1,405 @@ +.idea +docs +interop +.deps-cache +forge-packaged-debug +forge-packaged-release +##### Windows +# Windows thumbnail cache files +Thumbs.db +Thumbs.db:encryptable +ehthumbs.db +ehthumbs_vista.db + +# Dump file +*.stackdump + +# Folder config file +[Dd]esktop.ini + +# Recycle Bin used on file shares +$RECYCLE.BIN/ + +# Windows shortcuts +*.lnk + +##### Linux +*~ + +# temporary files which can be created if a process still has a handle open of a deleted file +.fuse_hidden* + +# KDE directory preferences +.directory + +# Linux trash folder which might appear on any partition or disk +.Trash-* + +# .nfs files are created when an open file is removed but is still being accessed +.nfs* + +##### MacOS +# General +.DS_Store +.AppleDouble +.LSOverride + +# Thumbnails +._* + +# Files that might appear in the root of a volume +.DocumentRevisions-V100 +.fseventsd +.Spotlight-V100 +.TemporaryItems +.Trashes +.VolumeIcon.icns +.com.apple.timemachine.donotpresent + +# Directories potentially created on remote AFP share +.AppleDB +.AppleDesktop +Network Trash Folder +Temporary Items +.apdisk + +# Generated files +/out/ +/build*/ + +# Log files +*.log + +##### Backup +*.bak +*.gho +*.ori +*.orig +*.tmp + +##### Vim +# Swap +[._]*.s[a-v][a-z] +!*.svg # comment out if you don't need vector files +[._]*.sw[a-p] +[._]s[a-rt-v][a-z] +[._]ss[a-gi-z] +[._]sw[a-p] + + +# Temporary +*~ +*~ +\#*\# +/.emacs.desktop +/.emacs.desktop.lock +*.elc +auto-save-list +tramp +.\#* +*.bak + +##### SublimeText +# Cache files for Sublime Text +*.tmlanguage.cache +*.tmPreferences.cache +*.stTheme.cache + +# Workspace files are user-specific +*.sublime-workspace + + +##### VisualStudioCode +.vscode/* +#!.vscode/settings.json +#!.vscode/tasks.json +#!.vscode/launch.json +#!.vscode/extensions.json +*.code-workspace + +# Local History for Visual Studio Code +.history/ + +# CMake +cmake-build-*/ + +##### Eclipse +.metadata +bin/ +tmp/ +*.tmp +*.bak +*.swp +*~.nib +local.properties +.settings/ +.loadpath +.recommenders + + +# Locally stored "Eclipse launch configurations" +*.launch + +# CDT-specific (C/C++ Development Tooling) +.cproject + +# CDT- autotools +.autotools + +# PDT-specific (PHP Development Tools) +.buildpath + +# sbteclipse plugin +.target + +# Tern plugin +.tern-project + +# TeXlipse plugin +.texlipse + +# STS (Spring Tool Suite) +.springBeans + +# Uncomment this line if you wish to ignore the project description file. +# Typically, this file would be tracked if it contains build/dependency configurations: +.project + +# C++ objects and libs +*.slo +*.lo +*.o +*.a +*.la +*.lai +*.so +*.so.* +*.dll +*.dylib + +##### VisualStudio +##### VisualStudio +## Ignore Visual Studio temporary files, build results, and +## files generated by popular Visual Studio add-ons. +## +## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore + +# User-specific files +*.rsuser +*.suo +*.user +*.userosscache +*.sln.docstates + +# User-specific files (MonoDevelop/Xamarin Studio) +*.userprefs + +# Build results +[Dd]ebug/ +[Dd]ebugPublic/ +[Rr]elease/ +[Rr]eleases/ +x64/ +x86/ +[Ww][Ii][Nn]32/ +[Aa][Rr][Mm]/ +[Aa][Rr][Mm]64/ +bld/ +[Bb]in/ +[Oo]bj/ +[Ll]og/ +[Ll]ogs/ + +# Visual Studio 2015/2017 cache/options directory +.vs/ + +# Files built by Visual Studio +*_i.c +*_p.c +*_h.h +*.ilk +*.meta +*.obj +*.iobj +*.pch +*.pdb +*.ipdb +*.pgc +*.pgd +*.rsp +*.sbr +*.tlb +*.tli +*.tlh +*.tmp +*.tmp_proj +*_wpftmp.csproj +*.log +*.vspscc +*.vssscc +.builds +*.pidb +*.svclog +*.scc + +# Visual C++ cache files +ipch/ +*.aps +*.ncb +*.opendb +*.opensdf +*.sdf +*.cachefile +*.VC.db +*.VC.VC.opendb + +# Visual Studio profiler +*.psess +*.vsp +*.vspx +*.sap + +# Visual Studio Trace Files +*.e2e + +# NuGet Packages +*.nupkg +# NuGet Symbol Packages +*.snupkg +# The packages folder can be ignored because of Package Restore +**/[Pp]ackages/* +# except build/, which is used as an MSBuild target. +!**/[Pp]ackages/build/ +# Uncomment if necessary however generally it will be regenerated when needed +#!**/[Pp]ackages/repositories.config +# NuGet v3's project.json files produces more ignorable files +*.nuget.props +*.nuget.targets + +# Visual Studio cache files +# files ending in .cache can be ignored +*.[Cc]ache +# but keep track of directories ending in .cache +!?*.[Cc]ache/ + +# Others +ClientBin/ +~$* +*~ +*.dbmdl +*.dbproj.schemaview +*.jfm +*.pfx +*.publishsettings +orleans.codegen.cs + +# Local History for Visual Studio +.localhistory/ + +##### CMake +CMakeLists.txt.user +CMakeCache.txt +CMakeFiles +CMakeScripts +Testing +Makefile +cmake_install.cmake +install_manifest.txt +compile_commands.json +CTestTestfile.cmake +_deps + +##### C++ +# Prerequisites +*.d + +# Compiled Object files +*.slo +*.lo +*.o +*.obj + +# Precompiled Headers +*.gch +*.pch + +# Compiled Dynamic libraries +*.so +*.dylib +*.dll + +# Fortran module files +*.mod +*.smod + +# Compiled Static libraries +*.lai +*.la +*.a +*.lib + +# Executables +*.exe +*.out +*.app + +# C/C++ binary extension file +*.bin + +##### C +# Prerequisites +*.d + +# Object files +*.o +*.ko +*.obj +*.elf + +# Linker output +*.ilk +*.map +*.exp + +# Precompiled Headers +*.gch +*.pch + +# Libraries +*.lib +*.a +*.la +*.lo + +# Shared objects (inc. Windows DLLs) +*.dll +*.so +*.so.* +*.dylib + +# Executables +*.exe +*.out +*.app +*.i*86 +*.x86_64 +*.hex + +# Debug files +*.dSYM/ +*.su +*.idb +*.pdb + +# Kernel Module Compile Results +*.mod* +*.cmd +.tmp_versions/ +modules.order +Module.symvers +Mkfile.old +dkms.conf + +/site +.venv +CMakeUserPresets.json diff --git a/CMakeLists.txt b/CMakeLists.txt index f7e42c5..e4be659 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,5 @@ ############################################################################## -# +# # # This file is part of QuantLib-Risks, an adaptor module to enable using XAD with # QuantLib. XAD is a fast and comprehensive C++ library for @@ -19,16 +19,142 @@ # # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . -# +# ############################################################################## option(QLRISKS_DISABLE_AAD "Disable using XAD for QuantLib's Real, allowing to run samples with double" OFF) +option(QLRISKS_USE_FORGE_CAPI "Use Forge C API instead of C++ API for binary compatibility" OFF) add_subdirectory(ql) if(MSVC) set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreaded$<$:Debug>") endif() add_subdirectory(Examples) + +############################################################################## +# QLRisks-Forge interface library (ForgeBackend adapter) +# NOTE: This must be defined BEFORE test-suite so tests can link to it +############################################################################## + +# Find Forge - either as a subdirectory target or as a pre-built package +# Pre-built package is PREFERRED because it isolates Forge's AVX2 compiler flags +# from QuantLib/XAD, preventing ODR violations and ABI incompatibilities. +message(STATUS "QLRisks-Forge: Looking for Forge...") + +set(FORGE_FOUND FALSE) + +if(QLRISKS_USE_FORGE_CAPI) + ########################################################################## + # C API Mode: Use forge_capi shared library for binary compatibility + ########################################################################## + message(STATUS "QLRisks-Forge: Using C API mode for binary compatibility") + + # Option 1: Check if forge_capi was added as subdirectory + if(TARGET forge_capi) + message(STATUS "QLRisks-Forge: Found forge_capi target (subdirectory mode)") + set(FORGE_FOUND TRUE) + set(FORGE_TARGET forge_capi) + # Get the capi directory for headers + get_target_property(FORGE_CAPI_SOURCE_DIR forge_capi SOURCE_DIR) + endif() + + # Option 2: Try find_package for pre-built ForgeCAPI + if(NOT FORGE_FOUND) + find_package(ForgeCAPI CONFIG QUIET) + if(ForgeCAPI_FOUND) + message(STATUS "QLRisks-Forge: Found ForgeCAPI package (pre-built mode)") + set(FORGE_FOUND TRUE) + set(FORGE_TARGET Forge::forge_capi) + endif() + endif() + + if(FORGE_FOUND) + add_library(qlrisks-forge INTERFACE) + add_library(QLRisks::forge ALIAS qlrisks-forge) + + target_include_directories(qlrisks-forge INTERFACE + $ + ) + + # Add C API header directory for subdirectory mode + if(FORGE_CAPI_SOURCE_DIR) + target_include_directories(qlrisks-forge INTERFACE + $ + ) + endif() + + target_compile_definitions(qlrisks-forge INTERFACE QLRISKS_USE_FORGE_CAPI=1) + + target_link_libraries(qlrisks-forge INTERFACE + XAD::xad + ${FORGE_TARGET} + ) + + message(STATUS "QLRisks-Forge: Configured with C API target ${FORGE_TARGET}") + else() + message(STATUS "QLRisks-Forge: forge_capi not found - ForgeBackend will not be available") + message(STATUS "QLRisks-Forge: To enable C API mode, add forge/tools/capi as subdirectory") + endif() + +else() + ########################################################################## + # C++ API Mode: Use original Forge library (requires matching compiler) + ########################################################################## + + # Option 1: Check if forge was added via QL_EXTERNAL_SUBDIRECTORIES (legacy) + if(TARGET forge) + message(STATUS "QLRisks-Forge: Found forge target (subdirectory mode)") + set(FORGE_FOUND TRUE) + set(FORGE_TARGET forge) + endif() + + # Option 2: Try find_package for pre-built Forge (preferred) + if(NOT FORGE_FOUND) + find_package(Forge CONFIG QUIET) + if(Forge_FOUND) + message(STATUS "QLRisks-Forge: Found Forge package (pre-built mode) version ${Forge_VERSION}") + set(FORGE_FOUND TRUE) + set(FORGE_TARGET Forge::forge) + endif() + endif() + + if(FORGE_FOUND) + add_library(qlrisks-forge INTERFACE) + add_library(QLRisks::forge ALIAS qlrisks-forge) + + target_include_directories(qlrisks-forge INTERFACE + $ + ) + + # For subdirectory mode, add Forge source directories + if(TARGET forge) + get_target_property(FORGE_SOURCE_DIR forge SOURCE_DIR) + if(FORGE_SOURCE_DIR) + target_include_directories(qlrisks-forge INTERFACE + $ + $ + ) + endif() + endif() + + target_link_libraries(qlrisks-forge INTERFACE + XAD::xad + ${FORGE_TARGET} + ) + + message(STATUS "QLRisks-Forge: Configured with C++ API target ${FORGE_TARGET}") + else() + message(STATUS "QLRisks-Forge: Forge not found - ForgeBackend will not be available") + message(STATUS "QLRisks-Forge: To enable, either:") + message(STATUS " 1. Add forge to QL_EXTERNAL_SUBDIRECTORIES (not recommended)") + message(STATUS " 2. Set CMAKE_PREFIX_PATH to pre-built Forge installation (recommended)") + endif() +endif() + +############################################################################## +# Test suite (after QLRisks::forge is defined) +############################################################################## + if(NOT QLRISKS_DISABLE_AAD) # the test suite is not supporting double add_subdirectory(test-suite) diff --git a/src/qlrisks-forge/ForgeBackend.hpp b/src/qlrisks-forge/ForgeBackend.hpp new file mode 100644 index 0000000..4e4749e --- /dev/null +++ b/src/qlrisks-forge/ForgeBackend.hpp @@ -0,0 +1,278 @@ +#pragma once + +////////////////////////////////////////////////////////////////////////////// +// +// ForgeBackend - XAD JIT backend using Forge for native code generation +// +// This file is part of QuantLib-Risks-Cpp-Forge, integrating Forge JIT +// compilation with QuantLib and XAD. +// +// When QLRISKS_USE_FORGE_CAPI is defined, this header forwards to +// ForgeBackendCAPI for binary compatibility across compilers. +// +////////////////////////////////////////////////////////////////////////////// + +#ifdef QLRISKS_USE_FORGE_CAPI + +#include +namespace qlrisks { namespace forge { using ForgeBackend = ForgeBackendCAPI; } } + +#else + +#include +#include + +// Forge library (https://github.com/da-roth/forge) +#include +#include +#include +#include + +#include +#include +#include +#include + +namespace qlrisks +{ +namespace forge +{ + +/** + * JIT Backend using Forge for native code generation. + * https://github.com/da-roth/forge + * + * Uses Forge's JIT compiler for fast forward pass execution. + * Falls back to JITGraphInterpreter for adjoint computation. + */ +class ForgeBackend : public xad::IJITBackend +{ + public: + // Constructor with optional graph optimizations (default: disabled) + explicit ForgeBackend(bool useGraphOptimizations = false) + : config_(useGraphOptimizations ? optimizedConfig() : defaultConfig()) + { + } + + ~ForgeBackend() override = default; + + ForgeBackend(ForgeBackend&&) noexcept = default; + ForgeBackend& operator=(ForgeBackend&&) noexcept = default; + + // No copy + ForgeBackend(const ForgeBackend&) = delete; + ForgeBackend& operator=(const ForgeBackend&) = delete; + + void compile(const xad::JITGraph& graph) override + { + // Skip recompilation if already compiled with same graph + if (kernel_ && lastNodeCount_ == graph.nodeCount()) + return; + + // Build ::forge::Graph from JITGraph + forgeGraph_ = ::forge::Graph(); + forgeGraph_.nodes.reserve(graph.nodeCount()); + + // First pass: create nodes without needsGradient + for (std::size_t i = 0; i < graph.nodeCount(); ++i) + { + ::forge::Node n; + n.op = static_cast<::forge::OpCode>(graph.opcodes[i]); + n.dst = static_cast(i); + n.a = graph.operand_a[i]; + n.b = graph.operand_b[i]; + n.c = graph.operand_c[i]; + n.imm = graph.immediates[i]; + n.isActive = (graph.flags[i] & xad::JITNodeFlags::IsActive) != 0; + n.isDead = (graph.flags[i] & xad::JITNodeFlags::IsDead) != 0; + n.needsGradient = false; // Will be set in propagation pass + forgeGraph_.nodes.push_back(n); + } + + // Copy constant pool and outputs + forgeGraph_.constPool = graph.const_pool; + forgeGraph_.outputs.assign(graph.output_ids.begin(), graph.output_ids.end()); + forgeGraph_.diff_inputs.assign(graph.input_ids.begin(), graph.input_ids.end()); + + // Second pass: propagate needsGradient from diff_inputs through the graph + // Mark all input nodes that are in diff_inputs as needing gradients + for (auto inputId : graph.input_ids) + { + if (inputId < forgeGraph_.nodes.size()) + forgeGraph_.nodes[inputId].needsGradient = true; + } + + // Forward propagation: if any operand needs gradient, result needs gradient + for (std::size_t i = 0; i < forgeGraph_.nodes.size(); ++i) + { + auto& node = forgeGraph_.nodes[i]; + if (node.isDead) continue; + + bool operandNeedsGrad = false; + if (node.a < forgeGraph_.nodes.size()) + operandNeedsGrad |= forgeGraph_.nodes[node.a].needsGradient; + if (node.b < forgeGraph_.nodes.size()) + operandNeedsGrad |= forgeGraph_.nodes[node.b].needsGradient; + if (node.c < forgeGraph_.nodes.size()) + operandNeedsGrad |= forgeGraph_.nodes[node.c].needsGradient; + + if (operandNeedsGrad) + node.needsGradient = true; + } + + // Extract input node IDs + inputIds_.clear(); + for (std::size_t i = 0; i < forgeGraph_.nodes.size(); ++i) + { + if (forgeGraph_.nodes[i].op == ::forge::OpCode::Input) + inputIds_.push_back(static_cast(i)); + } + outputIds_.assign(graph.output_ids.begin(), graph.output_ids.end()); + + // Compile to native code using the stored config + ::forge::ForgeEngine compiler(config_); + kernel_ = compiler.compile(forgeGraph_); + + if (!kernel_) + throw std::runtime_error("Forge kernel compilation failed"); + + // Create node value buffer + buffer_ = ::forge::NodeValueBufferFactory::create(forgeGraph_, *kernel_); + + if (!buffer_) + throw std::runtime_error("Forge buffer creation failed"); + + // Cache graph size to detect changes + lastNodeCount_ = graph.nodeCount(); + } + + void forward(const xad::JITGraph& graph, + const double* inputs, std::size_t numInputs, + double* outputs, std::size_t numOutputs) override + { + (void)graph; // unused, we use forgeGraph_ + + if (!kernel_ || !buffer_) + throw std::runtime_error("Backend not compiled"); + + if (numInputs != inputIds_.size()) + throw std::runtime_error("Input count mismatch"); + if (numOutputs != outputIds_.size()) + throw std::runtime_error("Output count mismatch"); + + // Set inputs + double inputLane[1]; + for (std::size_t i = 0; i < numInputs; ++i) { + inputLane[0] = inputs[i]; + buffer_->setLanes(inputIds_[i], inputLane); + } + + // Execute kernel (Forge always runs forward+backward, but we ignore gradients here) + buffer_->clearGradients(); + kernel_->execute(*buffer_); + + // Get outputs + double outputLane[1]; + for (std::size_t i = 0; i < numOutputs; ++i) { + buffer_->getLanes(outputIds_[i], outputLane); + outputs[i] = outputLane[0]; + } + } + + void forwardAndBackward(const xad::JITGraph& graph, + const double* inputs, std::size_t numInputs, + const double* outputAdjoints, std::size_t numOutputs, + double* outputs, + double* inputAdjoints) override + { + (void)graph; // unused, we use forgeGraph_ + (void)outputAdjoints; // unused, Forge auto-seeds output gradients to 1.0 + + if (!kernel_ || !buffer_) + throw std::runtime_error("Backend not compiled"); + + if (numInputs != inputIds_.size()) + throw std::runtime_error("Input count mismatch"); + if (numOutputs != outputIds_.size()) + throw std::runtime_error("Output count mismatch"); + + // Set inputs + double inputLane[1]; + for (std::size_t i = 0; i < numInputs; ++i) { + inputLane[0] = inputs[i]; + buffer_->setLanes(inputIds_[i], inputLane); + } + + // Clear gradients - Forge will auto-seed output gradients to 1.0 + buffer_->clearGradients(); + + // Execute kernel (forward + backward in one call) + kernel_->execute(*buffer_); + + double* gradPtr = buffer_->getGradientsPtr(); + + // Get outputs + double outputLane[1]; + for (std::size_t i = 0; i < numOutputs; ++i) { + buffer_->getLanes(outputIds_[i], outputLane); + outputs[i] = outputLane[0]; + } + + // Get input gradients + for (std::size_t i = 0; i < numInputs; ++i) { + size_t bufferIdx = buffer_->getBufferIndex(inputIds_[i]); + inputAdjoints[i] = gradPtr[bufferIdx]; + } + } + + void reset() override + { + kernel_.reset(); + buffer_.reset(); + forgeGraph_ = ::forge::Graph(); + inputIds_.clear(); + outputIds_.clear(); + lastNodeCount_ = 0; + } + + // ========================================================================= + // Accessors for graph reuse (e.g., by AVX backend) + // ========================================================================= + const ::forge::Graph& forgeGraph() const { return forgeGraph_; } + const std::vector& inputIds() const { return inputIds_; } + const std::vector& outputIds() const { return outputIds_; } + + private: + static ::forge::CompilerConfig defaultConfig() + { + // Default: only stability cleaning, no graph optimizations + ::forge::CompilerConfig config; + config.instructionSet = ::forge::CompilerConfig::InstructionSet::SSE2_SCALAR; + config.enableOptimizations = false; + config.enableCSE = false; + config.enableAlgebraicSimplification = false; + config.enableStabilityCleaning = true; + return config; + } + + static ::forge::CompilerConfig optimizedConfig() + { + // Use Forge's Fast config with all graph optimizations enabled + auto config = ::forge::CompilerConfig::Fast(); + config.instructionSet = ::forge::CompilerConfig::InstructionSet::SSE2_SCALAR; + return config; + } + + ::forge::CompilerConfig config_; + ::forge::Graph forgeGraph_; + std::unique_ptr<::forge::StitchedKernel> kernel_; + std::unique_ptr<::forge::INodeValueBuffer> buffer_; + std::vector inputIds_; + std::vector outputIds_; + std::size_t lastNodeCount_ = 0; +}; + +} // namespace forge +} // namespace qlrisks + +#endif // QLRISKS_USE_FORGE_CAPI diff --git a/src/qlrisks-forge/ForgeBackendAVX.hpp b/src/qlrisks-forge/ForgeBackendAVX.hpp new file mode 100644 index 0000000..ccfa996 --- /dev/null +++ b/src/qlrisks-forge/ForgeBackendAVX.hpp @@ -0,0 +1,322 @@ +#pragma once + +////////////////////////////////////////////////////////////////////////////// +// +// ForgeBackendAVX - AVX2 backend for 4-path batching using Forge +// +// This file is part of QuantLib-Risks-Cpp-Forge, integrating Forge JIT +// compilation with QuantLib and XAD. +// +// This backend processes 4 Monte Carlo paths per kernel execution using +// AVX2 SIMD instructions (256-bit YMM registers = 4 doubles). +// +// USAGE: This backend is standalone with lane-based API for manual batching +// +// When QLRISKS_USE_FORGE_CAPI is defined, this header forwards to +// ForgeBackendAVX_CAPI for binary compatibility across compilers. +// +////////////////////////////////////////////////////////////////////////////// + +#ifdef QLRISKS_USE_FORGE_CAPI + +#include +namespace qlrisks { namespace forge { using ForgeBackendAVX = ForgeBackendAVX_CAPI; } } + +#else + +#include + +// Forge library (https://github.com/da-roth/forge) +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +namespace qlrisks +{ +namespace forge +{ + +/** + * AVX2 Backend for Forge - standalone backend for 4-path SIMD execution. + * + * Takes an xad::JITGraph, converts it to forge::Graph, and compiles it + * with AVX2_PACKED instruction set for 4-path batching. + * + * Usage pattern: + * ForgeBackendAVX avxBackend; + * avxBackend.compile(jitGraph); + * + * for (pathBatch = 0; pathBatch < nPaths; pathBatch += 4) { + * // Set inputs for 4 paths + * for (size_t i = 0; i < numInputs; ++i) + * avxBackend.setInputLanes(i, &pathInputs[pathBatch][i]); + * + * // Run forward + backward, get both outputs and gradients + * double outputs[4], outputAdjoints[4] = {1.0, 1.0, 1.0, 1.0}; + * std::vector> inputGradients(numInputs); + * avxBackend.forwardAndBackward(outputAdjoints, outputs, inputGradients); + * } + */ +class ForgeBackendAVX +{ + public: + static constexpr int VECTOR_WIDTH = 4; // AVX2 processes 4 doubles + + explicit ForgeBackendAVX(bool useGraphOptimizations = false) + : config_(useGraphOptimizations ? optimizedConfig() : defaultConfig()) + { + } + + ~ForgeBackendAVX() = default; + + ForgeBackendAVX(ForgeBackendAVX&&) noexcept = default; + ForgeBackendAVX& operator=(ForgeBackendAVX&&) noexcept = default; + + // No copy + ForgeBackendAVX(const ForgeBackendAVX&) = delete; + ForgeBackendAVX& operator=(const ForgeBackendAVX&) = delete; + + /** + * Compile an xad::JITGraph with AVX2 instruction set. + * + * @param jitGraph The XAD JIT graph to compile + */ + void compile(const xad::JITGraph& jitGraph) + { + // Convert xad::JITGraph to forge::Graph + forgeGraph_ = ::forge::Graph(); + forgeGraph_.nodes.reserve(jitGraph.nodeCount()); + + // First pass: create nodes without needsGradient + for (std::size_t i = 0; i < jitGraph.nodeCount(); ++i) + { + ::forge::Node n; + n.op = static_cast<::forge::OpCode>(jitGraph.opcodes[i]); + n.dst = static_cast(i); + n.a = jitGraph.operand_a[i]; + n.b = jitGraph.operand_b[i]; + n.c = jitGraph.operand_c[i]; + n.imm = jitGraph.immediates[i]; + n.isActive = (jitGraph.flags[i] & xad::JITNodeFlags::IsActive) != 0; + n.isDead = (jitGraph.flags[i] & xad::JITNodeFlags::IsDead) != 0; + n.needsGradient = false; // Will be set in propagation pass + forgeGraph_.nodes.push_back(n); + } + + // Copy constant pool and outputs + forgeGraph_.constPool = jitGraph.const_pool; + forgeGraph_.outputs.assign(jitGraph.output_ids.begin(), jitGraph.output_ids.end()); + forgeGraph_.diff_inputs.assign(jitGraph.input_ids.begin(), jitGraph.input_ids.end()); + + // Second pass: propagate needsGradient from diff_inputs through the graph + // Mark all input nodes that are in diff_inputs as needing gradients + for (auto inputId : jitGraph.input_ids) + { + if (inputId < forgeGraph_.nodes.size()) + forgeGraph_.nodes[inputId].needsGradient = true; + } + + // Forward propagation: if any operand needs gradient, result needs gradient + for (std::size_t i = 0; i < forgeGraph_.nodes.size(); ++i) + { + auto& node = forgeGraph_.nodes[i]; + if (node.isDead) continue; + + bool operandNeedsGrad = false; + if (node.a < forgeGraph_.nodes.size()) + operandNeedsGrad |= forgeGraph_.nodes[node.a].needsGradient; + if (node.b < forgeGraph_.nodes.size()) + operandNeedsGrad |= forgeGraph_.nodes[node.b].needsGradient; + if (node.c < forgeGraph_.nodes.size()) + operandNeedsGrad |= forgeGraph_.nodes[node.c].needsGradient; + + if (operandNeedsGrad) + node.needsGradient = true; + } + + // Extract input node IDs + inputIds_.clear(); + for (std::size_t i = 0; i < forgeGraph_.nodes.size(); ++i) + { + if (forgeGraph_.nodes[i].op == ::forge::OpCode::Input) + inputIds_.push_back(static_cast(i)); + } + outputIds_.assign(jitGraph.output_ids.begin(), jitGraph.output_ids.end()); + + // Compile to native code using AVX2 config + ::forge::ForgeEngine compiler(config_); + kernel_ = compiler.compile(forgeGraph_); + + if (!kernel_) + throw std::runtime_error("Forge AVX2 kernel compilation failed"); + + // Create node value buffer (will be AVX2NodeValueBuffer due to AVX2_PACKED config) + buffer_ = ::forge::NodeValueBufferFactory::create(forgeGraph_, *kernel_); + + if (!buffer_) + throw std::runtime_error("Forge AVX2 buffer creation failed"); + + // Pre-compute buffer indices for all inputs (for gradient retrieval) + inputBufferIndices_.clear(); + inputBufferIndices_.reserve(inputIds_.size()); + for (auto id : inputIds_) + { + inputBufferIndices_.push_back(buffer_->getBufferIndex(id)); + } + + // Pre-compute buffer indices for outputs + outputBufferIndices_.clear(); + outputBufferIndices_.reserve(outputIds_.size()); + for (auto id : outputIds_) + { + outputBufferIndices_.push_back(buffer_->getBufferIndex(id)); + } + } + + // ========================================================================= + // Lane-based API for 4-path batching + // ========================================================================= + + /** + * Set 4 values for an input (one per SIMD lane = one per path) + * @param inputIndex Index into the input array (0 to numInputs-1) + * @param values Pointer to 4 doubles [path0, path1, path2, path3] + */ + void setInputLanes(std::size_t inputIndex, const double* values) + { + if (inputIndex >= inputIds_.size()) + throw std::runtime_error("Input index out of range"); + buffer_->setLanes(inputIds_[inputIndex], values); + } + + /** + * Get 4 output values (one per SIMD lane = one per path) + * @param outputIndex Index into the output array (0 to numOutputs-1) + * @param output Pointer to receive 4 doubles [path0, path1, path2, path3] + */ + void getOutputLanes(std::size_t outputIndex, double* output) const + { + if (outputIndex >= outputIds_.size()) + throw std::runtime_error("Output index out of range"); + buffer_->getLanes(outputIds_[outputIndex], output); + } + + /** + * Execute forward + backward in one call (efficient path for Forge) + * Forge always computes both forward and backward together. + * + * Usage: + * double inputs[4], outputs[4], outputAdjoints[4], inputGradients[4]; + * for each input: setInputLanes(idx, inputs); + * forwardAndBackward(outputAdjoints, outputs, inputGradients); + * + * @param outputAdjoints Pointer to array of 4 output adjoint values (seeds for backward) + * @param outputs Pointer to array to receive 4 output values + * @param inputGradients Pointer to array arrays to receive input gradients (numInputs arrays of 4 values) + */ + void forwardAndBackward(const double* outputAdjoints, double* outputs, + std::vector>& inputGradients) + { + if (!kernel_ || !buffer_) + throw std::runtime_error("Backend not compiled"); + + if (inputGradients.size() != inputIds_.size()) + throw std::runtime_error("Input gradients array size mismatch"); + + // Clear and seed output adjoints + buffer_->clearGradients(); + double* gradPtr = buffer_->getGradientsPtr(); + size_t bufferIdx = buffer_->getBufferIndex(outputIds_[0]); + std::memcpy(&gradPtr[bufferIdx], outputAdjoints, VECTOR_WIDTH * sizeof(double)); + + // Execute kernel (forward + backward together) + kernel_->execute(*buffer_); + + // Get outputs + buffer_->getLanes(outputIds_[0], outputs); + + // Get all input gradients in a single batched call (much more efficient) + // Reference: /docs/quantlib-forge benchmarks use single getGradientLanes call + const std::size_t numInputs = inputIds_.size(); + std::vector allGradients(numInputs * VECTOR_WIDTH); + buffer_->getGradientLanes(inputBufferIndices_, allGradients.data()); + + // Distribute interleaved gradients to per-input arrays + // Layout: [input0_lane0..3, input1_lane0..3, ...] + for (std::size_t i = 0; i < numInputs; ++i) { + std::memcpy(inputGradients[i].data(), &allGradients[i * VECTOR_WIDTH], + VECTOR_WIDTH * sizeof(double)); + } + } + + // ========================================================================= + // Accessors + // ========================================================================= + + std::size_t numInputs() const { return inputIds_.size(); } + std::size_t numOutputs() const { return outputIds_.size(); } + + const std::vector& inputIds() const { return inputIds_; } + const std::vector& outputIds() const { return outputIds_; } + + // Access to underlying forge graph (for debugging/inspection) + const ::forge::Graph& forgeGraph() const { return forgeGraph_; } + + // Access to buffer for advanced usage + ::forge::INodeValueBuffer* buffer() { return buffer_.get(); } + const ::forge::INodeValueBuffer* buffer() const { return buffer_.get(); } + + void reset() + { + kernel_.reset(); + buffer_.reset(); + forgeGraph_ = ::forge::Graph(); + inputIds_.clear(); + outputIds_.clear(); + inputBufferIndices_.clear(); + outputBufferIndices_.clear(); + } + + private: + static ::forge::CompilerConfig defaultConfig() + { + ::forge::CompilerConfig config; + // Use AVX2 packed mode - 4 doubles per operation + config.instructionSet = ::forge::CompilerConfig::InstructionSet::AVX2_PACKED; + config.enableOptimizations = false; + config.enableCSE = false; + config.enableAlgebraicSimplification = false; + config.enableStabilityCleaning = true; + return config; + } + + static ::forge::CompilerConfig optimizedConfig() + { + auto config = ::forge::CompilerConfig::Fast(); + // Use AVX2 packed mode - 4 doubles per operation + config.instructionSet = ::forge::CompilerConfig::InstructionSet::AVX2_PACKED; + return config; + } + + ::forge::CompilerConfig config_; + ::forge::Graph forgeGraph_; + std::unique_ptr<::forge::StitchedKernel> kernel_; + std::unique_ptr<::forge::INodeValueBuffer> buffer_; + std::vector inputIds_; + std::vector outputIds_; + std::vector inputBufferIndices_; // Pre-computed for gradient access + std::vector outputBufferIndices_; // Pre-computed for output access +}; + +} // namespace forge +} // namespace qlrisks + +#endif // QLRISKS_USE_FORGE_CAPI diff --git a/src/qlrisks-forge/ForgeBackendAVX_CAPI.hpp b/src/qlrisks-forge/ForgeBackendAVX_CAPI.hpp new file mode 100644 index 0000000..90376fc --- /dev/null +++ b/src/qlrisks-forge/ForgeBackendAVX_CAPI.hpp @@ -0,0 +1,287 @@ +#pragma once + +////////////////////////////////////////////////////////////////////////////// +// +// ForgeBackendAVX_CAPI - AVX2 backend using Forge C API +// +// This file is part of QuantLib-Risks-Cpp-Forge, integrating Forge JIT +// compilation with QuantLib and XAD. +// +// This backend processes 4 Monte Carlo paths per kernel execution using +// AVX2 SIMD instructions (256-bit YMM registers = 4 doubles). +// +// Uses the stable C API for binary compatibility across compilers. +// +////////////////////////////////////////////////////////////////////////////// + +#include + +// Forge C API - stable ABI +#include + +#include +#include +#include +#include +#include +#include +#include + +namespace qlrisks +{ +namespace forge +{ + +/** + * AVX2 Backend using Forge C API - standalone backend for 4-path SIMD execution. + * + * Uses the stable C API for binary compatibility with precompiled Forge packages. + * + * Usage pattern: + * ForgeBackendAVX_CAPI avxBackend; + * avxBackend.compile(jitGraph); + * + * for (pathBatch = 0; pathBatch < nPaths; pathBatch += 4) { + * for (size_t i = 0; i < numInputs; ++i) + * avxBackend.setInputLanes(i, &pathInputs[pathBatch][i]); + * + * double outputs[4], outputAdjoints[4] = {1.0, 1.0, 1.0, 1.0}; + * std::vector> inputGradients(numInputs); + * avxBackend.forwardAndBackward(outputAdjoints, outputs, inputGradients); + * } + */ +class ForgeBackendAVX_CAPI +{ + public: + static constexpr int VECTOR_WIDTH = 4; // AVX2 processes 4 doubles + + explicit ForgeBackendAVX_CAPI(bool useGraphOptimizations = false) + : useOptimizations_(useGraphOptimizations) + , graph_(nullptr) + , config_(nullptr) + , kernel_(nullptr) + , buffer_(nullptr) + { + } + + ~ForgeBackendAVX_CAPI() + { + cleanup(); + } + + ForgeBackendAVX_CAPI(ForgeBackendAVX_CAPI&& other) noexcept + : useOptimizations_(other.useOptimizations_) + , graph_(other.graph_) + , config_(other.config_) + , kernel_(other.kernel_) + , buffer_(other.buffer_) + , inputIds_(std::move(other.inputIds_)) + , outputIds_(std::move(other.outputIds_)) + { + other.graph_ = nullptr; + other.config_ = nullptr; + other.kernel_ = nullptr; + other.buffer_ = nullptr; + } + + ForgeBackendAVX_CAPI& operator=(ForgeBackendAVX_CAPI&& other) noexcept + { + if (this != &other) + { + cleanup(); + useOptimizations_ = other.useOptimizations_; + graph_ = other.graph_; + config_ = other.config_; + kernel_ = other.kernel_; + buffer_ = other.buffer_; + inputIds_ = std::move(other.inputIds_); + outputIds_ = std::move(other.outputIds_); + other.graph_ = nullptr; + other.config_ = nullptr; + other.kernel_ = nullptr; + other.buffer_ = nullptr; + } + return *this; + } + + // No copy + ForgeBackendAVX_CAPI(const ForgeBackendAVX_CAPI&) = delete; + ForgeBackendAVX_CAPI& operator=(const ForgeBackendAVX_CAPI&) = delete; + + /** + * Compile an xad::JITGraph with AVX2 instruction set. + */ + void compile(const xad::JITGraph& jitGraph) + { + cleanup(); + + // Create graph + graph_ = forge_graph_create(); + if (!graph_) + throw std::runtime_error(std::string("Forge graph creation failed: ") + forge_get_last_error()); + + // Build graph from JITGraph + inputIds_.clear(); + for (std::size_t i = 0; i < jitGraph.nodeCount(); ++i) + { + ForgeOpCode op = static_cast(jitGraph.opcodes[i]); + uint32_t a = jitGraph.operand_a[i]; + uint32_t b = jitGraph.operand_b[i]; + uint32_t c = jitGraph.operand_c[i]; + double imm = jitGraph.immediates[i]; + int isActive = (jitGraph.flags[i] & xad::JITNodeFlags::IsActive) != 0 ? 1 : 0; + int needsGrad = 0; + + uint32_t nodeId = forge_graph_add_node(graph_, op, a, b, c, imm, isActive, needsGrad); + if (nodeId == UINT32_MAX) + throw std::runtime_error(std::string("Forge add_node failed: ") + forge_get_last_error()); + + if (op == FORGE_OP_INPUT) + inputIds_.push_back(nodeId); + } + + // Mark outputs + outputIds_.assign(jitGraph.output_ids.begin(), jitGraph.output_ids.end()); + for (auto outputId : outputIds_) + { + ForgeError err = forge_graph_mark_output(graph_, outputId); + if (err != FORGE_SUCCESS) + throw std::runtime_error(std::string("Forge mark_output failed: ") + forge_get_last_error()); + } + + // Mark diff inputs + for (auto inputId : jitGraph.input_ids) + { + ForgeError err = forge_graph_mark_diff_input(graph_, inputId); + if (err != FORGE_SUCCESS) + throw std::runtime_error(std::string("Forge mark_diff_input failed: ") + forge_get_last_error()); + } + + // Create config with AVX2 + config_ = useOptimizations_ ? forge_config_create_fast() : forge_config_create_default(); + if (!config_) + throw std::runtime_error("Forge config creation failed"); + + forge_config_set_instruction_set(config_, FORGE_INSTRUCTION_SET_AVX2_PACKED); + + // Compile + kernel_ = forge_compile(graph_, config_); + if (!kernel_) + throw std::runtime_error(std::string("Forge AVX2 compilation failed: ") + forge_get_last_error()); + + // Create buffer + buffer_ = forge_buffer_create(graph_, kernel_); + if (!buffer_) + throw std::runtime_error(std::string("Forge AVX2 buffer creation failed: ") + forge_get_last_error()); + } + + // ========================================================================= + // Lane-based API for 4-path batching + // ========================================================================= + + /** + * Set 4 values for an input (one per SIMD lane = one per path) + */ + void setInputLanes(std::size_t inputIndex, const double* values) + { + if (inputIndex >= inputIds_.size()) + throw std::runtime_error("Input index out of range"); + forge_buffer_set_lanes(buffer_, inputIds_[inputIndex], values); + } + + /** + * Get 4 output values (one per SIMD lane = one per path) + */ + void getOutputLanes(std::size_t outputIndex, double* output) const + { + if (outputIndex >= outputIds_.size()) + throw std::runtime_error("Output index out of range"); + forge_buffer_get_lanes(buffer_, outputIds_[outputIndex], output); + } + + /** + * Execute forward + backward in one call + */ + void forwardAndBackward(const double* outputAdjoints, double* outputs, + std::vector>& inputGradients) + { + if (!kernel_ || !buffer_) + throw std::runtime_error("Backend not compiled"); + + if (inputGradients.size() != inputIds_.size()) + throw std::runtime_error("Input gradients array size mismatch"); + + (void)outputAdjoints; // Forge auto-seeds to 1.0 + + // Clear gradients and execute + forge_buffer_clear_gradients(buffer_); + ForgeError err = forge_execute(kernel_, buffer_); + if (err != FORGE_SUCCESS) + throw std::runtime_error(std::string("Forge execution failed: ") + forge_get_last_error()); + + // Get outputs (first output only for now) + forge_buffer_get_lanes(buffer_, outputIds_[0], outputs); + + // Get input gradients + for (std::size_t i = 0; i < inputIds_.size(); ++i) + { + forge_buffer_get_gradient_lanes(buffer_, &inputIds_[i], 1, inputGradients[i].data()); + } + } + + // ========================================================================= + // Accessors + // ========================================================================= + + std::size_t numInputs() const { return inputIds_.size(); } + std::size_t numOutputs() const { return outputIds_.size(); } + + const std::vector& inputIds() const { return inputIds_; } + const std::vector& outputIds() const { return outputIds_; } + + int getVectorWidth() const + { + return buffer_ ? forge_buffer_get_vector_width(buffer_) : 0; + } + + /** + * Get buffer index for a node ID (for compatibility with C++ API) + */ + std::size_t getBufferIndex(uint32_t nodeId) const + { + return buffer_ ? forge_buffer_get_index(buffer_, nodeId) : SIZE_MAX; + } + + /** + * Returns this for buffer() compatibility (C++ API returns buffer pointer) + */ + ForgeBackendAVX_CAPI* buffer() { return this; } + const ForgeBackendAVX_CAPI* buffer() const { return this; } + + void reset() + { + cleanup(); + inputIds_.clear(); + outputIds_.clear(); + } + + private: + void cleanup() + { + if (buffer_) { forge_buffer_destroy(buffer_); buffer_ = nullptr; } + if (kernel_) { forge_kernel_destroy(kernel_); kernel_ = nullptr; } + if (config_) { forge_config_destroy(config_); config_ = nullptr; } + if (graph_) { forge_graph_destroy(graph_); graph_ = nullptr; } + } + + bool useOptimizations_; + ForgeGraphHandle graph_; + ForgeConfigHandle config_; + ForgeKernelHandle kernel_; + ForgeBufferHandle buffer_; + std::vector inputIds_; + std::vector outputIds_; +}; + +} // namespace forge +} // namespace qlrisks diff --git a/src/qlrisks-forge/ForgeBackendCAPI.hpp b/src/qlrisks-forge/ForgeBackendCAPI.hpp new file mode 100644 index 0000000..064d618 --- /dev/null +++ b/src/qlrisks-forge/ForgeBackendCAPI.hpp @@ -0,0 +1,272 @@ +#pragma once + +////////////////////////////////////////////////////////////////////////////// +// +// ForgeBackendCAPI - XAD JIT backend using Forge C API +// +// This file is part of QuantLib-Risks-Cpp-Forge, integrating Forge JIT +// compilation with QuantLib and XAD. +// +// Uses the stable C API for binary compatibility across compilers. +// +////////////////////////////////////////////////////////////////////////////// + +#include +#include + +// Forge C API - stable ABI +#include + +#include +#include +#include +#include + +namespace qlrisks +{ +namespace forge +{ + +/** + * JIT Backend using Forge C API for native code generation. + * + * This version uses the stable C API instead of the C++ API, + * enabling binary compatibility with precompiled Forge packages + * built with different compilers. + */ +class ForgeBackendCAPI : public xad::IJITBackend +{ + public: + // Constructor with optional graph optimizations (default: disabled) + explicit ForgeBackendCAPI(bool useGraphOptimizations = false) + : useOptimizations_(useGraphOptimizations) + , graph_(nullptr) + , config_(nullptr) + , kernel_(nullptr) + , buffer_(nullptr) + { + } + + ~ForgeBackendCAPI() override + { + cleanup(); + } + + ForgeBackendCAPI(ForgeBackendCAPI&& other) noexcept + : useOptimizations_(other.useOptimizations_) + , graph_(other.graph_) + , config_(other.config_) + , kernel_(other.kernel_) + , buffer_(other.buffer_) + , inputIds_(std::move(other.inputIds_)) + , outputIds_(std::move(other.outputIds_)) + , lastNodeCount_(other.lastNodeCount_) + { + other.graph_ = nullptr; + other.config_ = nullptr; + other.kernel_ = nullptr; + other.buffer_ = nullptr; + } + + ForgeBackendCAPI& operator=(ForgeBackendCAPI&& other) noexcept + { + if (this != &other) + { + cleanup(); + useOptimizations_ = other.useOptimizations_; + graph_ = other.graph_; + config_ = other.config_; + kernel_ = other.kernel_; + buffer_ = other.buffer_; + inputIds_ = std::move(other.inputIds_); + outputIds_ = std::move(other.outputIds_); + lastNodeCount_ = other.lastNodeCount_; + other.graph_ = nullptr; + other.config_ = nullptr; + other.kernel_ = nullptr; + other.buffer_ = nullptr; + } + return *this; + } + + // No copy + ForgeBackendCAPI(const ForgeBackendCAPI&) = delete; + ForgeBackendCAPI& operator=(const ForgeBackendCAPI&) = delete; + + void compile(const xad::JITGraph& jitGraph) override + { + // Skip recompilation if already compiled with same graph + if (kernel_ && lastNodeCount_ == jitGraph.nodeCount()) + return; + + // Clean up previous compilation + cleanup(); + + // Create graph + graph_ = forge_graph_create(); + if (!graph_) + throw std::runtime_error(std::string("Forge graph creation failed: ") + forge_get_last_error()); + + // Build graph from JITGraph + inputIds_.clear(); + for (std::size_t i = 0; i < jitGraph.nodeCount(); ++i) + { + ForgeOpCode op = static_cast(jitGraph.opcodes[i]); + uint32_t a = jitGraph.operand_a[i]; + uint32_t b = jitGraph.operand_b[i]; + uint32_t c = jitGraph.operand_c[i]; + double imm = jitGraph.immediates[i]; + int isActive = (jitGraph.flags[i] & xad::JITNodeFlags::IsActive) != 0 ? 1 : 0; + int needsGrad = 0; // Will be set via mark_diff_input + + uint32_t nodeId = forge_graph_add_node(graph_, op, a, b, c, imm, isActive, needsGrad); + if (nodeId == UINT32_MAX) + throw std::runtime_error(std::string("Forge add_node failed: ") + forge_get_last_error()); + + // Track input nodes + if (op == FORGE_OP_INPUT) + inputIds_.push_back(nodeId); + } + + // Mark outputs + outputIds_.assign(jitGraph.output_ids.begin(), jitGraph.output_ids.end()); + for (auto outputId : outputIds_) + { + ForgeError err = forge_graph_mark_output(graph_, outputId); + if (err != FORGE_SUCCESS) + throw std::runtime_error(std::string("Forge mark_output failed: ") + forge_get_last_error()); + } + + // Mark diff inputs + for (auto inputId : jitGraph.input_ids) + { + ForgeError err = forge_graph_mark_diff_input(graph_, inputId); + if (err != FORGE_SUCCESS) + throw std::runtime_error(std::string("Forge mark_diff_input failed: ") + forge_get_last_error()); + } + + // Create config + config_ = useOptimizations_ ? forge_config_create_fast() : forge_config_create_default(); + if (!config_) + throw std::runtime_error("Forge config creation failed"); + + // Set instruction set to SSE2 scalar + forge_config_set_instruction_set(config_, FORGE_INSTRUCTION_SET_SSE2_SCALAR); + + // Compile + kernel_ = forge_compile(graph_, config_); + if (!kernel_) + throw std::runtime_error(std::string("Forge compilation failed: ") + forge_get_last_error()); + + // Create buffer + buffer_ = forge_buffer_create(graph_, kernel_); + if (!buffer_) + throw std::runtime_error(std::string("Forge buffer creation failed: ") + forge_get_last_error()); + + lastNodeCount_ = jitGraph.nodeCount(); + } + + void forward(const xad::JITGraph& graph, + const double* inputs, std::size_t numInputs, + double* outputs, std::size_t numOutputs) override + { + (void)graph; + + if (!kernel_ || !buffer_) + throw std::runtime_error("Backend not compiled"); + + if (numInputs != inputIds_.size()) + throw std::runtime_error("Input count mismatch"); + if (numOutputs != outputIds_.size()) + throw std::runtime_error("Output count mismatch"); + + // Set inputs + for (std::size_t i = 0; i < numInputs; ++i) + { + forge_buffer_set_value(buffer_, inputIds_[i], inputs[i]); + } + + // Clear gradients and execute + forge_buffer_clear_gradients(buffer_); + ForgeError err = forge_execute(kernel_, buffer_); + if (err != FORGE_SUCCESS) + throw std::runtime_error(std::string("Forge execution failed: ") + forge_get_last_error()); + + // Get outputs + for (std::size_t i = 0; i < numOutputs; ++i) + { + forge_buffer_get_value(buffer_, outputIds_[i], &outputs[i]); + } + } + + void forwardAndBackward(const xad::JITGraph& graph, + const double* inputs, std::size_t numInputs, + const double* outputAdjoints, std::size_t numOutputs, + double* outputs, + double* inputAdjoints) override + { + (void)graph; + (void)outputAdjoints; // Forge auto-seeds to 1.0 + + if (!kernel_ || !buffer_) + throw std::runtime_error("Backend not compiled"); + + if (numInputs != inputIds_.size()) + throw std::runtime_error("Input count mismatch"); + if (numOutputs != outputIds_.size()) + throw std::runtime_error("Output count mismatch"); + + // Set inputs + for (std::size_t i = 0; i < numInputs; ++i) + { + forge_buffer_set_value(buffer_, inputIds_[i], inputs[i]); + } + + // Clear gradients and execute + forge_buffer_clear_gradients(buffer_); + ForgeError err = forge_execute(kernel_, buffer_); + if (err != FORGE_SUCCESS) + throw std::runtime_error(std::string("Forge execution failed: ") + forge_get_last_error()); + + // Get outputs + for (std::size_t i = 0; i < numOutputs; ++i) + { + forge_buffer_get_value(buffer_, outputIds_[i], &outputs[i]); + } + + // Get input gradients + for (std::size_t i = 0; i < numInputs; ++i) + { + forge_buffer_get_gradient(buffer_, inputIds_[i], &inputAdjoints[i]); + } + } + + void reset() override + { + cleanup(); + inputIds_.clear(); + outputIds_.clear(); + lastNodeCount_ = 0; + } + + private: + void cleanup() + { + if (buffer_) { forge_buffer_destroy(buffer_); buffer_ = nullptr; } + if (kernel_) { forge_kernel_destroy(kernel_); kernel_ = nullptr; } + if (config_) { forge_config_destroy(config_); config_ = nullptr; } + if (graph_) { forge_graph_destroy(graph_); graph_ = nullptr; } + } + + bool useOptimizations_; + ForgeGraphHandle graph_; + ForgeConfigHandle config_; + ForgeKernelHandle kernel_; + ForgeBufferHandle buffer_; + std::vector inputIds_; + std::vector outputIds_; + std::size_t lastNodeCount_ = 0; +}; + +} // namespace forge +} // namespace qlrisks diff --git a/src/qlrisks-forge/ForgeBackends.hpp b/src/qlrisks-forge/ForgeBackends.hpp new file mode 100644 index 0000000..260f530 --- /dev/null +++ b/src/qlrisks-forge/ForgeBackends.hpp @@ -0,0 +1,54 @@ +#pragma once + +////////////////////////////////////////////////////////////////////////////// +// +// ForgeBackends.hpp - Backend type selection based on API mode +// +// This header provides unified type aliases that automatically select +// the appropriate backend implementation: +// +// QLRISKS_USE_FORGE_CAPI=1: Uses C API backends (binary compatible) +// QLRISKS_USE_FORGE_CAPI=0: Uses C++ API backends (requires matching compiler) +// +// Usage: +// #include +// auto backend = std::make_unique(); +// qlrisks::forge::AVXBackend avxBackend; +// +////////////////////////////////////////////////////////////////////////////// + +#ifdef QLRISKS_USE_FORGE_CAPI + +// C API mode - binary compatible across compilers +#include +#include + +namespace qlrisks +{ +namespace forge +{ + +using ScalarBackend = ForgeBackendCAPI; +using AVXBackend = ForgeBackendAVX_CAPI; + +} // namespace forge +} // namespace qlrisks + +#else + +// C++ API mode - requires matching compiler/ABI +#include +#include + +namespace qlrisks +{ +namespace forge +{ + +using ScalarBackend = ForgeBackend; +using AVXBackend = ForgeBackendAVX; + +} // namespace forge +} // namespace qlrisks + +#endif diff --git a/test-suite/CMakeLists.txt b/test-suite/CMakeLists.txt index c96f8ae..f2c1f73 100644 --- a/test-suite/CMakeLists.txt +++ b/test-suite/CMakeLists.txt @@ -1,3 +1,7 @@ +option(QLRISKS_BUILD_TEST_SUITE "Build the QuantLib-Risks test suite" OFF) +option(QLRISKS_ENABLE_FORGE_TESTS "Enable Forge JIT tests (requires Forge)" OFF) +option(QLRISKS_BUILD_BENCHMARK "Build the QuantLib-Risks benchmark suite" OFF) + set(QLRISKS_TEST_SOURCES americanoption_xad.cpp barrieroption_xad.cpp @@ -9,14 +13,33 @@ set(QLRISKS_TEST_SOURCES forwardrateagreement_xad.cpp hestonmodel_xad.cpp swap_xad.cpp - + swaption_jit_pipeline_xad.cpp + utilities_xad.cpp quantlibtestsuite_xad.cpp ) +# Benchmark sources (separate from test suite) +set(QLRISKS_BENCHMARK_SOURCES + quantlibrisks_benchmark.cpp + swaption_benchmark.cpp + utilities_xad.cpp +) + +# Forge JIT tests - only added when explicitly enabled AND Forge is available +# These tests require Forge's JIT compiler for AAD +set(QLRISKS_FORGE_TESTS_ENABLED FALSE) +if(QLRISKS_ENABLE_FORGE_TESTS AND TARGET QLRisks::forge) + message(STATUS "QLRisks test-suite: Adding Forge JIT tests") + list(APPEND QLRISKS_TEST_SOURCES jit_xad.cpp forgebackend_xad.cpp) + set(QLRISKS_FORGE_TESTS_ENABLED TRUE) +elseif(QLRISKS_ENABLE_FORGE_TESTS) + message(WARNING "QLRisks test-suite: QLRISKS_ENABLE_FORGE_TESTS=ON but QLRisks::forge not available") +endif() + set(QLRISKS_TEST_HEADERS utilities_xad.hpp) -if(QL_BUILD_TEST_SUITE) +if(QL_BUILD_TEST_SUITE OR QLRISKS_BUILD_TEST_SUITE) add_executable(QuantLib-Risks_test_suite ${QLRISKS_TEST_SOURCES} ${QLRISKS_TEST_HEADERS}) set_target_properties(QuantLib-Risks_test_suite PROPERTIES OUTPUT_NAME "quantlib-risks-test-suite") if (NOT Boost_USE_STATIC_LIBS) @@ -25,8 +48,43 @@ if(QL_BUILD_TEST_SUITE) target_link_libraries(QuantLib-Risks_test_suite PRIVATE ql_library ${QL_THREAD_LIBRARIES}) + + # ONLY link to Forge if Forge tests are actually enabled + # This is important because linking Forge brings in AVX2-compiled code + # which can cause ODR violations if not properly isolated + if(QLRISKS_FORGE_TESTS_ENABLED) + message(STATUS "QLRisks test-suite: Linking QLRisks::forge (Forge tests enabled)") + target_link_libraries(QuantLib-Risks_test_suite PRIVATE QLRisks::forge) + target_compile_definitions(QuantLib-Risks_test_suite PRIVATE QLRISKS_HAS_FORGE=1) + else() + message(STATUS "QLRisks test-suite: NOT linking Forge (Forge tests disabled)") + endif() + if (QL_INSTALL_TEST_SUITE) install(TARGETS QuantLib-Risks_test_suite RUNTIME DESTINATION ${QL_INSTALL_BINDIR}) endif() add_test(NAME QuantLib-Risks_test_suite COMMAND QuantLib-Risks_test_suite --log_level=message) +endif() + +# Benchmark suite - separate executable for CI benchmarks +# Requires Forge to be available +if(QLRISKS_BUILD_BENCHMARK AND TARGET QLRisks::forge) + message(STATUS "QLRisks: Building benchmark suite") + add_executable(QuantLib-Risks_benchmark ${QLRISKS_BENCHMARK_SOURCES} ${QLRISKS_TEST_HEADERS}) + set_target_properties(QuantLib-Risks_benchmark PROPERTIES OUTPUT_NAME "quantlib-risks-benchmark") + if (NOT Boost_USE_STATIC_LIBS) + target_compile_definitions(QuantLib-Risks_benchmark PRIVATE BOOST_ALL_DYN_LINK) + endif() + target_link_libraries(QuantLib-Risks_benchmark PRIVATE + ql_library + QLRisks::forge + ${QL_THREAD_LIBRARIES}) + target_compile_definitions(QuantLib-Risks_benchmark PRIVATE QLRISKS_HAS_FORGE=1) + + if (QL_INSTALL_TEST_SUITE) + install(TARGETS QuantLib-Risks_benchmark RUNTIME DESTINATION ${QL_INSTALL_BINDIR}) + endif() + add_test(NAME QuantLib-Risks_benchmark COMMAND QuantLib-Risks_benchmark --log_level=message) +elseif(QLRISKS_BUILD_BENCHMARK) + message(WARNING "QLRisks: QLRISKS_BUILD_BENCHMARK=ON but QLRisks::forge not available") endif() \ No newline at end of file diff --git a/test-suite/forgebackend_xad.cpp b/test-suite/forgebackend_xad.cpp new file mode 100644 index 0000000..7d6d261 --- /dev/null +++ b/test-suite/forgebackend_xad.cpp @@ -0,0 +1,298 @@ +/* -*- mode: c++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ + +/* + This file is part of QuantLib-Risks, a C++ library for AAD-enabled + quantitative finance using QuantLib and XAD. + + This file tests the Forge JIT backend integration with XAD. + Unlike jit_xad.cpp which uses the C++ interpreter, this tests + the actual native code generation via Forge. +*/ + +#include "toplevelfixture.hpp" +#include "utilities_xad.hpp" +#include +#include +#include +#include +#include +#include + +using namespace QuantLib; +using namespace boost::unit_test_framework; + +BOOST_FIXTURE_TEST_SUITE(QuantLibRisksForgeTests, TopLevelFixture) + +BOOST_AUTO_TEST_SUITE(ForgeBackendTests) + +namespace { + +// f1: Simple linear function +// f(x) = x * 3 + 2, f'(x) = 3 +template +T f1(const T& x) +{ + return x * 3.0 + 2.0; +} + +// f2: Function with supported math operations +// Uses: sin, cos, exp, log, sqrt, abs +template +T f2(const T& x) +{ + using std::sin; using std::cos; using std::exp; using std::log; + using std::sqrt; using std::abs; + + T result = sin(x) + cos(x) * 2.0; + result = result + exp(x / 10.0) + log(x + 5.0); + result = result + sqrt(x + 1.0); + result = result + abs(x - 1.0) + x * x; + result = result + 1.0 / (x + 2.0); + return result; +} + +// f3ABool: Branching with ABool::If for trackable branches +xad::AD f3ABool(const xad::AD& x) +{ + return xad::less(x, 2.0).If(2.0 * x, 10.0 * x); +} + +double f3ABool_double(double x) +{ + return (x < 2.0) ? 2.0 * x : 10.0 * x; +} + +} // anonymous namespace + +BOOST_AUTO_TEST_CASE(testForgeBackendLinearFunction) +{ + BOOST_TEST_MESSAGE("Testing ForgeBackend with linear function..."); + std::cout << "\n=== ForgeBackend Linear Function Test: f(x) = 3x + 2 ===" << std::endl; + + std::vector inputs = {2.0, 0.5, -1.0}; + + // Compute with Tape + std::vector tapeOutputs, tapeDerivatives; + { + xad::Tape tape; + for (double input : inputs) + { + xad::AD x(input); + tape.registerInput(x); + tape.newRecording(); + xad::AD y = f1(x); + tape.registerOutput(y); + derivative(y) = 1.0; + tape.computeAdjoints(); + tapeOutputs.push_back(value(y)); + tapeDerivatives.push_back(derivative(x)); + tape.clearAll(); + } + } + + // Compute with ForgeBackend (native JIT) + std::vector forgeOutputs, forgeDerivatives; + { + auto jit = xad::JITCompiler::withBackend(); + + xad::AD x(inputs[0]); + jit.registerInput(x); + jit.newRecording(); + xad::AD y = f1(x); + jit.registerOutput(y); + jit.compile(); // Compile before forward + + for (double input : inputs) + { + value(x) = input; + double output; + jit.forward(&output, 1); + forgeOutputs.push_back(output); + + jit.clearDerivatives(); + derivative(y) = 1.0; + jit.computeAdjoints(); + forgeDerivatives.push_back(derivative(x)); + } + } + + // Compare results + for (std::size_t i = 0; i < inputs.size(); ++i) + { + double expected = f1(inputs[i]); + std::cout << " x=" << inputs[i] + << ": tape=" << tapeOutputs[i] << " (deriv=" << tapeDerivatives[i] << ")" + << ", forge=" << forgeOutputs[i] << " (deriv=" << forgeDerivatives[i] << ")" + << std::endl; + BOOST_CHECK_CLOSE(expected, tapeOutputs[i], 1e-10); + BOOST_CHECK_CLOSE(expected, forgeOutputs[i], 1e-10); + BOOST_CHECK_CLOSE(tapeDerivatives[i], forgeDerivatives[i], 1e-10); + } +} + +BOOST_AUTO_TEST_CASE(testForgeBackendMathFunctions) +{ + BOOST_TEST_MESSAGE("Testing ForgeBackend with math functions..."); + std::cout << "\n=== ForgeBackend Math Functions Test: sin, cos, exp, log, sqrt, abs ===" << std::endl; + + std::vector inputs = {2.0, 0.5}; + + // Compute with Tape + std::vector tapeOutputs, tapeDerivatives; + { + xad::Tape tape; + for (double input : inputs) + { + xad::AD x(input); + tape.registerInput(x); + tape.newRecording(); + xad::AD y = f2(x); + tape.registerOutput(y); + derivative(y) = 1.0; + tape.computeAdjoints(); + tapeOutputs.push_back(value(y)); + tapeDerivatives.push_back(derivative(x)); + tape.clearAll(); + } + } + + // Compute with ForgeBackend + std::vector forgeOutputs, forgeDerivatives; + { + auto jit = xad::JITCompiler::withBackend(); + + xad::AD x(inputs[0]); + jit.registerInput(x); + jit.newRecording(); + xad::AD y = f2(x); + jit.registerOutput(y); + jit.compile(); // Compile before forward + + for (double input : inputs) + { + value(x) = input; + double output; + jit.forward(&output, 1); + forgeOutputs.push_back(output); + + jit.clearDerivatives(); + derivative(y) = 1.0; + jit.computeAdjoints(); + forgeDerivatives.push_back(derivative(x)); + } + } + + // Compare results + for (std::size_t i = 0; i < inputs.size(); ++i) + { + double expected = f2(inputs[i]); + std::cout << " x=" << inputs[i] + << ": tape=" << tapeOutputs[i] << " (deriv=" << tapeDerivatives[i] << ")" + << ", forge=" << forgeOutputs[i] << " (deriv=" << forgeDerivatives[i] << ")" + << std::endl; + BOOST_CHECK_CLOSE(expected, tapeOutputs[i], 1e-10); + BOOST_CHECK_CLOSE(expected, forgeOutputs[i], 1e-10); + BOOST_CHECK_CLOSE(tapeDerivatives[i], forgeDerivatives[i], 1e-10); + } +} + +BOOST_AUTO_TEST_CASE(testForgeBackendABoolBranching) +{ + BOOST_TEST_MESSAGE("Testing ForgeBackend with ABool::If branching..."); + std::cout << "\n=== ForgeBackend ABool::If Test: if(x<2) 2*x else 10*x ===" << std::endl; + + std::vector inputs = {1.0, 3.0}; + + // Compute with Tape + std::vector tapeOutputs, tapeDerivatives; + { + xad::Tape tape; + for (double input : inputs) + { + xad::AD x(input); + tape.registerInput(x); + tape.newRecording(); + xad::AD y = f3ABool(x); + tape.registerOutput(y); + derivative(y) = 1.0; + tape.computeAdjoints(); + tapeOutputs.push_back(value(y)); + tapeDerivatives.push_back(derivative(x)); + tape.clearAll(); + } + } + + // Compute with ForgeBackend + std::vector forgeOutputs, forgeDerivatives; + { + auto jit = xad::JITCompiler::withBackend(); + + xad::AD x(inputs[0]); + jit.registerInput(x); + jit.newRecording(); + xad::AD y = f3ABool(x); + jit.registerOutput(y); + jit.compile(); // Compile before forward + + for (double input : inputs) + { + value(x) = input; + double output; + jit.forward(&output, 1); + forgeOutputs.push_back(output); + + jit.clearDerivatives(); + derivative(y) = 1.0; + jit.computeAdjoints(); + forgeDerivatives.push_back(derivative(x)); + } + } + + // Compare results - ABool::If should track both branches + for (std::size_t i = 0; i < inputs.size(); ++i) + { + double expected = f3ABool_double(inputs[i]); + std::cout << " x=" << inputs[i] + << ": tape=" << tapeOutputs[i] << " (deriv=" << tapeDerivatives[i] << ")" + << ", forge=" << forgeOutputs[i] << " (deriv=" << forgeDerivatives[i] << ")" + << " - MATCH!" << std::endl; + BOOST_CHECK_CLOSE(expected, tapeOutputs[i], 1e-10); + BOOST_CHECK_CLOSE(expected, forgeOutputs[i], 1e-10); + BOOST_CHECK_CLOSE(tapeDerivatives[i], forgeDerivatives[i], 1e-10); + } +} + +BOOST_AUTO_TEST_CASE(testForgeBackendBasicInstantiation) +{ + BOOST_TEST_MESSAGE("Testing ForgeBackend basic instantiation..."); + std::cout << "\n=== ForgeBackend Basic Test: f(x) = x^2 + 3x ===" << std::endl; + + auto jit = xad::JITCompiler::withBackend(); + + xad::AD x(2.0); + jit.registerInput(x); + jit.newRecording(); + xad::AD y = x * x + 3.0 * x; // f(x) = x^2 + 3x, f'(x) = 2x + 3 + jit.registerOutput(y); + jit.compile(); // Compile before forward + + double output; + jit.forward(&output, 1); + std::cout << " f(2) = " << output << " (expected: 10)" << std::endl; + BOOST_CHECK_CLOSE(10.0, output, 1e-10); // f(2) = 4 + 6 = 10 + + value(x) = 5.0; + jit.forward(&output, 1); + std::cout << " f(5) = " << output << " (expected: 40)" << std::endl; + BOOST_CHECK_CLOSE(40.0, output, 1e-10); // f(5) = 25 + 15 = 40 + + jit.clearDerivatives(); + derivative(y) = 1.0; + jit.computeAdjoints(); + std::cout << " f'(5) = " << derivative(x) << " (expected: 13)" << std::endl; + BOOST_CHECK_CLOSE(13.0, derivative(x), 1e-10); // f'(5) = 10 + 3 = 13 +} + +BOOST_AUTO_TEST_SUITE_END() + +BOOST_AUTO_TEST_SUITE_END() diff --git a/test-suite/jit_xad.cpp b/test-suite/jit_xad.cpp new file mode 100644 index 0000000..c30ba80 --- /dev/null +++ b/test-suite/jit_xad.cpp @@ -0,0 +1,345 @@ +/* -*- mode: c++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ + +/* + This file is part of QuantLib-Risks, a C++ library for AAD-enabled + quantitative finance using QuantLib and XAD. + + QuantLib-Risks is free software: you can redistribute it and/or modify it + under the terms of the QuantLib-Risks license. You should have received a + copy of the license along with this program; if not, please visit + . + + This file tests the XAD JIT compilation infrastructure. +*/ + +#include "toplevelfixture.hpp" +#include "utilities_xad.hpp" +#include +#include +#include +#include +#include + +using namespace QuantLib; +using namespace boost::unit_test_framework; + +BOOST_FIXTURE_TEST_SUITE(QuantLibRisksJITTests, TopLevelFixture) + +BOOST_AUTO_TEST_SUITE(JITTests) + +namespace { + +// f1: Simple linear function +// f(x) = x * 3 + 2, f'(x) = 3 +template +T f1(const T& x) +{ + return x * 3.0 + 2.0; +} + +// f2: Function with supported math operations +// Uses: sin, cos, exp, log, sqrt, abs +template +T f2(const T& x) +{ + using std::sin; using std::cos; using std::exp; using std::log; + using std::sqrt; using std::abs; + + T result = sin(x) + cos(x) * 2.0; + result = result + exp(x / 10.0) + log(x + 5.0); + result = result + sqrt(x + 1.0); + result = result + abs(x - 1.0) + x * x; + result = result + 1.0 / (x + 2.0); + return result; +} + +// Helper to get value for both double and AD types +inline double getValue(double x) { return x; } +template +double getValue(const T& x) { return value(x); } + +// f3: Branching function (regular if/else - JIT records one branch) +template +T f3(const T& x) +{ + if (getValue(x) < 2.0) + return 2.0 * x; + else + return 10.0 * x; +} + +// f3ABool: Branching with ABool::If (JIT records both branches) +xad::AD f3ABool(const xad::AD& x) +{ + return xad::less(x, 2.0).If(2.0 * x, 10.0 * x); +} + +double f3ABool_double(double x) +{ + return (x < 2.0) ? 2.0 * x : 10.0 * x; +} + +} // anonymous namespace + +BOOST_AUTO_TEST_CASE(testJITLinearFunction) +{ + BOOST_TEST_MESSAGE("Testing JIT compilation with linear function..."); + std::cout << "\n=== JIT Linear Function Test: f(x) = 3x + 2 ===" << std::endl; + + std::vector inputs = {2.0, 0.5, -1.0}; + + // Compute with Tape + std::vector tapeOutputs, tapeDerivatives; + { + xad::Tape tape; + for (double input : inputs) + { + xad::AD x(input); + tape.registerInput(x); + tape.newRecording(); + xad::AD y = f1(x); + tape.registerOutput(y); + derivative(y) = 1.0; + tape.computeAdjoints(); + tapeOutputs.push_back(value(y)); + tapeDerivatives.push_back(derivative(x)); + tape.clearAll(); + } + } + + // Compute with JIT (record once, reuse) + std::vector jitOutputs, jitDerivatives; + { + xad::JITCompiler jit; + + xad::AD x(inputs[0]); + jit.registerInput(x); + jit.newRecording(); + xad::AD y = f1(x); + jit.registerOutput(y); + jit.compile(); // Compile before forward + + for (double input : inputs) + { + value(x) = input; + double output; + jit.forward(&output, 1); + jitOutputs.push_back(output); + + jit.clearDerivatives(); + derivative(y) = 1.0; + jit.computeAdjoints(); + jitDerivatives.push_back(derivative(x)); + } + } + + // Compare results + for (std::size_t i = 0; i < inputs.size(); ++i) + { + double expected = f1(inputs[i]); + std::cout << " x=" << inputs[i] + << ": tape=" << tapeOutputs[i] << " (deriv=" << tapeDerivatives[i] << ")" + << ", jit=" << jitOutputs[i] << " (deriv=" << jitDerivatives[i] << ")" + << std::endl; + BOOST_CHECK_CLOSE(expected, tapeOutputs[i], 1e-10); + BOOST_CHECK_CLOSE(expected, jitOutputs[i], 1e-10); + BOOST_CHECK_CLOSE(tapeDerivatives[i], jitDerivatives[i], 1e-10); + } +} + +BOOST_AUTO_TEST_CASE(testJITMathFunctions) +{ + BOOST_TEST_MESSAGE("Testing JIT compilation with math functions..."); + std::cout << "\n=== JIT Math Functions Test: sin, cos, exp, log, sqrt, abs ===" << std::endl; + + std::vector inputs = {2.0, 0.5}; + + // Compute with Tape + std::vector tapeOutputs, tapeDerivatives; + { + xad::Tape tape; + for (double input : inputs) + { + xad::AD x(input); + tape.registerInput(x); + tape.newRecording(); + xad::AD y = f2(x); + tape.registerOutput(y); + derivative(y) = 1.0; + tape.computeAdjoints(); + tapeOutputs.push_back(value(y)); + tapeDerivatives.push_back(derivative(x)); + tape.clearAll(); + } + } + + // Compute with JIT + std::vector jitOutputs, jitDerivatives; + { + xad::JITCompiler jit; + + xad::AD x(inputs[0]); + jit.registerInput(x); + jit.newRecording(); + xad::AD y = f2(x); + jit.registerOutput(y); + jit.compile(); // Compile before forward + + for (double input : inputs) + { + value(x) = input; + double output; + jit.forward(&output, 1); + jitOutputs.push_back(output); + + jit.clearDerivatives(); + derivative(y) = 1.0; + jit.computeAdjoints(); + jitDerivatives.push_back(derivative(x)); + } + } + + // Compare results + for (std::size_t i = 0; i < inputs.size(); ++i) + { + double expected = f2(inputs[i]); + std::cout << " x=" << inputs[i] + << ": tape=" << tapeOutputs[i] << " (deriv=" << tapeDerivatives[i] << ")" + << ", jit=" << jitOutputs[i] << " (deriv=" << jitDerivatives[i] << ")" + << std::endl; + BOOST_CHECK_CLOSE(expected, tapeOutputs[i], 1e-10); + BOOST_CHECK_CLOSE(expected, jitOutputs[i], 1e-10); + BOOST_CHECK_CLOSE(tapeDerivatives[i], jitDerivatives[i], 1e-10); + } +} + +BOOST_AUTO_TEST_CASE(testJITBranchingRegularIf) +{ + BOOST_TEST_MESSAGE("Testing JIT with regular if/else (graph reuse behavior)..."); + std::cout << "\n=== JIT Branching Test: Regular if/else (demonstrates graph reuse) ===" << std::endl; + std::cout << " Formula: if (x < 2) 2*x else 10*x" << std::endl; + + // With regular if/else, JIT records the branch taken during recording + // and will use that branch for all subsequent evaluations + std::vector inputs = {1.0, 3.0}; + + // Compute with Tape (re-records each time, follows actual branch) + std::vector tapeOutputs; + { + xad::Tape tape; + for (double input : inputs) + { + xad::AD x(input); + tape.registerInput(x); + tape.newRecording(); + xad::AD y = f3(x); + tape.registerOutput(y); + tapeOutputs.push_back(value(y)); + tape.clearAll(); + } + } + + // Compute with JIT (records branch at x=1, uses it for all) + std::vector jitOutputs; + { + xad::JITCompiler jit; + + xad::AD x(inputs[0]); // x=1 -> takes first branch (2*x) + jit.registerInput(x); + jit.newRecording(); + xad::AD y = f3(x); + jit.registerOutput(y); + jit.compile(); // Compile before forward + + for (double input : inputs) + { + value(x) = input; + double output; + jit.forward(&output, 1); + jitOutputs.push_back(output); + } + } + + std::cout << " x=1: tape=" << tapeOutputs[0] << " (correct: 2*1=2), jit=" << jitOutputs[0] << std::endl; + std::cout << " x=3: tape=" << tapeOutputs[1] << " (correct: 10*3=30), jit=" << jitOutputs[1] << " (uses recorded 2*x branch!)" << std::endl; + + // Tape follows actual branches: f(1)=2, f(3)=30 + BOOST_CHECK_CLOSE(tapeOutputs[0], 2.0, 1e-10); // 2*1 = 2 + BOOST_CHECK_CLOSE(tapeOutputs[1], 30.0, 1e-10); // 10*3 = 30 + + // JIT recorded first branch (2*x), uses it for both: f(1)=2, f(3)=6 + BOOST_CHECK_CLOSE(jitOutputs[0], 2.0, 1e-10); // 2*1 = 2 + BOOST_CHECK_CLOSE(jitOutputs[1], 6.0, 1e-10); // 2*3 = 6 (uses recorded branch!) +} + +BOOST_AUTO_TEST_CASE(testJITBranchingABool) +{ + BOOST_TEST_MESSAGE("Testing JIT with ABool::If (tracks both branches)..."); + std::cout << "\n=== JIT Branching Test: ABool::If (tracks both branches correctly) ===" << std::endl; + std::cout << " Formula: ABool::If(x < 2, 2*x, 10*x)" << std::endl; + + // With ABool::If, JIT records both branches and selects at runtime + std::vector inputs = {1.0, 3.0}; + + // Compute with Tape + std::vector tapeOutputs, tapeDerivatives; + { + xad::Tape tape; + for (double input : inputs) + { + xad::AD x(input); + tape.registerInput(x); + tape.newRecording(); + xad::AD y = f3ABool(x); + tape.registerOutput(y); + derivative(y) = 1.0; + tape.computeAdjoints(); + tapeOutputs.push_back(value(y)); + tapeDerivatives.push_back(derivative(x)); + tape.clearAll(); + } + } + + // Compute with JIT + std::vector jitOutputs, jitDerivatives; + { + xad::JITCompiler jit; + + xad::AD x(inputs[0]); + jit.registerInput(x); + jit.newRecording(); + xad::AD y = f3ABool(x); + jit.registerOutput(y); + jit.compile(); // Compile before forward + + for (double input : inputs) + { + value(x) = input; + double output; + jit.forward(&output, 1); + jitOutputs.push_back(output); + + jit.clearDerivatives(); + derivative(y) = 1.0; + jit.computeAdjoints(); + jitDerivatives.push_back(derivative(x)); + } + } + + // Both should match: ABool::If allows JIT to track both branches + for (std::size_t i = 0; i < inputs.size(); ++i) + { + double expected = f3ABool_double(inputs[i]); + std::cout << " x=" << inputs[i] + << ": tape=" << tapeOutputs[i] << " (deriv=" << tapeDerivatives[i] << ")" + << ", jit=" << jitOutputs[i] << " (deriv=" << jitDerivatives[i] << ")" + << " - MATCH!" << std::endl; + BOOST_CHECK_CLOSE(expected, tapeOutputs[i], 1e-10); + BOOST_CHECK_CLOSE(expected, jitOutputs[i], 1e-10); + BOOST_CHECK_CLOSE(tapeDerivatives[i], jitDerivatives[i], 1e-10); + } +} + +BOOST_AUTO_TEST_SUITE_END() + +BOOST_AUTO_TEST_SUITE_END() diff --git a/test-suite/quantlibrisks_benchmark.cpp b/test-suite/quantlibrisks_benchmark.cpp new file mode 100644 index 0000000..44b220b --- /dev/null +++ b/test-suite/quantlibrisks_benchmark.cpp @@ -0,0 +1,20 @@ +/* -*- mode: c++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ + +/* + Copyright (C) 2025 Xcelerit Computing Limited + + This file is part of QuantLib-Risks / XAD / Forge integration. + + Main entry point for the QuantLib-Risks benchmark suite. +*/ + +#define BOOST_TEST_MODULE QuantLibRisksBenchmark + +#include + +/* Use BOOST_MSVC instead of _MSC_VER since some other vendors (Metrowerks, + for example) also #define _MSC_VER +*/ +#if !defined(BOOST_ALL_NO_LIB) && defined(BOOST_MSVC) +# include +#endif diff --git a/test-suite/swaption_benchmark.cpp b/test-suite/swaption_benchmark.cpp new file mode 100644 index 0000000..99e731e --- /dev/null +++ b/test-suite/swaption_benchmark.cpp @@ -0,0 +1,2517 @@ +/* -*- mode: c++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ + +/* + Copyright (C) 2025 Xcelerit Computing Limited + + This file is part of QuantLib-Risks / XAD / Forge integration. + + Swaption JIT Benchmark - Performance comparison of AD approaches: + - XAD Tape: Traditional tape-based reverse-mode AD + - JIT (Forge): Just-In-Time compiled native code for AD + - JIT-AVX: JIT with AVX2 SIMD vectorization (4 paths per instruction) + + Benchmarks: + 1. Simple Swaption (1Y into 1Y) - basic scaling test + 2. Larger Swaption (5Y into 5Y) - attempts to approximate a realistic setup + + Reference: https://github.com/auto-differentiation/xad/issues/70 +*/ + +#include "toplevelfixture.hpp" +#include "utilities_xad.hpp" + +// QuantLib includes +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// LMM Monte Carlo includes +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +// Forge JIT backends +#include +#include + +using namespace QuantLib; +using namespace boost::unit_test_framework; + +BOOST_FIXTURE_TEST_SUITE(QuantLibRisksTests, TopLevelFixture) + +BOOST_AUTO_TEST_SUITE(SwaptionBenchmarkTests) + +////////////////////////////////////////////////////////////////////////////// +// Helper: Create IborIndex with ZeroCurve (from libormarketmodel.cpp) +////////////////////////////////////////////////////////////////////////////// + +namespace { + +ext::shared_ptr makeIndex(std::vector dates, + const std::vector& rates) { + DayCounter dayCounter = Actual360(); + RelinkableHandle termStructure; + ext::shared_ptr index(new Euribor6M(termStructure)); + + Date todaysDate = index->fixingCalendar().adjust(Date(4, September, 2005)); + Settings::instance().evaluationDate() = todaysDate; + + dates[0] = index->fixingCalendar().advance(todaysDate, + index->fixingDays(), Days); + + termStructure.linkTo(ext::shared_ptr( + new ZeroCurve(dates, rates, dayCounter))); + + return index; +} + +////////////////////////////////////////////////////////////////////////////// +// High-performance chain rule: result = jacobian^T * derivatives +// For hybrid AD workflows: JIT computes dOutput/dIntermediate, +// Tape computes dIntermediate/dInput, chain rule gives dOutput/dInput. +////////////////////////////////////////////////////////////////////////////// + +/// Apply chain rule: result[j] = sum_i(derivatives[i] * jacobian[i * numInputs + j]) +/// @param jacobian Row-major flat array [numIntermediates x numInputs] +/// @param derivatives Vector [numIntermediates] (dOutput/dIntermediate) +/// @param result Output vector [numInputs] (dOutput/dInput) - zeroed first +/// @param numIntermediates Number of intermediate variables +/// @param numInputs Number of input variables +inline void applyChainRule(const double* __restrict jacobian, + const double* __restrict derivatives, + double* __restrict result, + std::size_t numIntermediates, + std::size_t numInputs) +{ + // Zero result + for (std::size_t j = 0; j < numInputs; ++j) + result[j] = 0.0; + + // Accumulate: result[j] += derivatives[i] * jacobian[i,j] + for (std::size_t i = 0; i < numIntermediates; ++i) + { + const double deriv_i = derivatives[i]; + const double* jac_row = jacobian + i * numInputs; + for (std::size_t j = 0; j < numInputs; ++j) + { + result[j] += deriv_i * jac_row[j]; + } + } +} + +} // anonymous namespace + +////////////////////////////////////////////////////////////////////////////// +// Benchmark 1: Simple Swaption Scaling (1Y into 1Y) +////////////////////////////////////////////////////////////////////////////// + +BOOST_AUTO_TEST_CASE(testBenchmark_SimpleSwaptionScaling) +{ + BOOST_TEST_MESSAGE("Running Simple Swaption Scaling Benchmark..."); + + std::cout << "\n"; + std::cout << "=============================================================================\n"; + std::cout << " XAD JIT BENCHMARK: Simple Swaption (1Y into 1Y)\n"; + std::cout << "=============================================================================\n"; + std::cout << std::endl; + std::cout << " This benchmark compares Algorithmic Differentiation approaches for\n"; + std::cout << " computing sensitivities in Monte Carlo swaption pricing.\n"; + std::cout << std::endl; + std::cout << " TWO MC IMPLEMENTATIONS:\n"; + std::cout << " QL = QuantLib's MultiPathGenerator (full path storage)\n"; + std::cout << " RR = Direct process->evolve() calls (JIT-compatible)\n"; + std::cout << std::endl; + std::cout << " QuantLib's MultiPathGenerator stores complete paths internally,\n"; + std::cout << " which records everything on the AD tape. The 'direct evolve'\n"; + std::cout << " approach calls the diffusion step-by-step with explicit inputs,\n"; + std::cout << " enabling JIT compilation of the inner loop.\n"; + std::cout << std::endl; + std::cout << " APPROACHES TESTED:\n"; + std::cout << " XAD(QL) - XAD tape + QuantLib MultiPathGenerator\n"; + std::cout << " XAD(RR) - XAD tape + direct evolve (baseline for JIT)\n"; + std::cout << " JIT(RR) - Forge JIT-compiled kernel\n"; + std::cout << " JIT-Intrp - XAD JIT graph interpreter (no native code)\n"; + std::cout << " JIT-AVX - Forge JIT + AVX2 SIMD (4 paths/instruction)\n"; + std::cout << std::endl; + std::cout << " INSTRUMENT:\n"; + std::cout << " European payer swaption: 1Y option into 1Y swap\n"; + std::cout << " Model: LIBOR Market Model (LMM) with lognormal forwards\n"; + std::cout << " Sensitivities: dPrice/dMarketQuotes (9 inputs)\n"; + std::cout << std::endl; + + using Clock = std::chrono::high_resolution_clock; + using Duration = std::chrono::duration; + + // Market data setup (same as Stage 4) + Calendar calendar = TARGET(); + Date todaysDate(4, September, 2005); + Settings::instance().evaluationDate() = todaysDate; + Integer fixingDays = 2; + Date settlementDate = calendar.adjust(calendar.advance(todaysDate, fixingDays, Days)); + DayCounter dayCounter = Actual360(); + + Size numDeposits = 4; + Size numSwaps = 5; + std::vector depoTenors = {1 * Days, 1 * Months, 3 * Months, 6 * Months}; + std::vector swapTenors = {1 * Years, 2 * Years, 3 * Years, 4 * Years, 5 * Years}; + + std::vector depoRates_val = {0.0350, 0.0365, 0.0380, 0.0400}; + std::vector swapRates_val = {0.0420, 0.0480, 0.0520, 0.0550, 0.0575}; + + Size numMarketQuotes = numDeposits + numSwaps; + + // LMM parameters + Size size = 10; + Size i_opt = 2; + Size j_opt = 2; + Size steps = 8; + + // Build base curve and process setup + std::vector baseZeroRates = {0.0350, 0.0575}; + std::vector baseDates = {settlementDate, settlementDate + 6 * Years}; + auto baseIndex = makeIndex(baseDates, baseZeroRates); + + ext::shared_ptr baseProcess( + new LiborForwardModelProcess(size, baseIndex)); + ext::shared_ptr baseCorrModel( + new LmExponentialCorrelationModel(size, 0.5)); + ext::shared_ptr baseVolaModel( + new LmLinearExponentialVolatilityModel(baseProcess->fixingTimes(), + 0.291, 1.483, 0.116, 0.00001)); + baseProcess->setCovarParam(ext::shared_ptr( + new LfmCovarianceProxy(baseVolaModel, baseCorrModel))); + + // Grid and timing setup + std::vector