nsb.api.diffusers/.github/workflows/pr_tests_gpu.yml at main · NotSoBot/nsb.api.diffusers · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
name: Fast GPU Tests on PR

on:
  pull_request:
    branches: main
    paths:
      - "src/diffusers/models/modeling_utils.py"
      - "src/diffusers/models/model_loading_utils.py"
      - "src/diffusers/pipelines/pipeline_utils.py"
      - "src/diffusers/pipeline_loading_utils.py"
      - "src/diffusers/loaders/lora_base.py"
      - "src/diffusers/loaders/lora_pipeline.py"
      - "src/diffusers/loaders/peft.py"
      - "tests/pipelines/test_pipelines_common.py"
      - "tests/models/test_modeling_common.py"
      - "examples/**/*.py"
  workflow_dispatch:

concurrency:
  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
  cancel-in-progress: true

env:
  DIFFUSERS_IS_CI: yes
  OMP_NUM_THREADS: 8
  MKL_NUM_THREADS: 8
  HF_XET_HIGH_PERFORMANCE: 1
  PYTEST_TIMEOUT: 600
  PIPELINE_USAGE_CUTOFF: 1000000000 # set high cutoff so that only always-test pipelines run

jobs:
  check_code_quality:
    runs-on: ubuntu-22.04
    steps:
      - uses: actions/checkout@v6
      - name: Set up Python
        uses: actions/setup-python@v6
        with:
          python-version: "3.8"
      - name: Install dependencies
        run: |
          pip install --upgrade pip
          pip install .[quality]
      - name: Check quality
        run: make quality
      - name: Check if failure
        if: ${{ failure() }}
        run: |
          echo "Quality check failed. Please ensure the right dependency versions are installed with 'pip install -e .[quality]' and run 'make style && make quality'" >> $GITHUB_STEP_SUMMARY

  check_repository_consistency:
    needs: check_code_quality
    runs-on: ubuntu-22.04
    steps:
      - uses: actions/checkout@v6
      - name: Set up Python
        uses: actions/setup-python@v6
        with:
          python-version: "3.8"
      - name: Install dependencies
        run: |
          pip install --upgrade pip
          pip install .[quality]
      - name: Check repo consistency
        run: |
          python utils/check_copies.py
          python utils/check_dummies.py
          python utils/check_support_list.py
          make deps_table_check_updated
      - name: Check if failure
        if: ${{ failure() }}
        run: |
          echo "Repo consistency check failed. Please ensure the right dependency versions are installed with 'pip install -e .[quality]' and run 'make fix-copies'" >> $GITHUB_STEP_SUMMARY

  setup_torch_cuda_pipeline_matrix:
    needs: [check_code_quality, check_repository_consistency]
    name: Setup Torch Pipelines CUDA Slow Tests Matrix
    runs-on:
      group: aws-general-8-plus
    container:
      image: diffusers/diffusers-pytorch-cpu
    outputs:
      pipeline_test_matrix: ${{ steps.fetch_pipeline_matrix.outputs.pipeline_test_matrix }}
    steps:
      - name: Checkout diffusers
        uses: actions/checkout@v6
        with:
          fetch-depth: 2
      - name: Install dependencies
        run: |
          uv pip install -e ".[quality]"
      - name: Environment
        run: |
          python utils/print_env.py
      - name: Fetch Pipeline Matrix
        id: fetch_pipeline_matrix
        run: |
          matrix=$(python utils/fetch_torch_cuda_pipeline_test_matrix.py)
          echo $matrix
          echo "pipeline_test_matrix=$matrix" >> $GITHUB_OUTPUT
      - name: Pipeline Tests Artifacts
        if: ${{ always() }}
        uses: actions/upload-artifact@v6
        with:
          name: test-pipelines.json
          path: reports

  torch_pipelines_cuda_tests:
    name: Torch Pipelines CUDA Tests
    needs: setup_torch_cuda_pipeline_matrix
    strategy:
      fail-fast: false
      max-parallel: 8
      matrix:
        module: ${{ fromJson(needs.setup_torch_cuda_pipeline_matrix.outputs.pipeline_test_matrix) }}
    runs-on:
      group: aws-g4dn-2xlarge
    container:
      image: diffusers/diffusers-pytorch-cuda
      options: --shm-size "16gb" --ipc host --gpus all
    steps:
      - name: Checkout diffusers
        uses: actions/checkout@v6
        with:
          fetch-depth: 2

      - name: NVIDIA-SMI
        run: |
          nvidia-smi
      - name: Install dependencies
        run: |
          uv pip install -e ".[quality]"
          uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
          #uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
          uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1

      - name: Environment
        run: |
          python utils/print_env.py
      - name: Extract tests
        id: extract_tests
        run: |
          pattern=$(python utils/extract_tests_from_mixin.py --type pipeline)
          echo "$pattern" > /tmp/test_pattern.txt
          echo "pattern_file=/tmp/test_pattern.txt" >> $GITHUB_OUTPUT

      - name: PyTorch CUDA checkpoint tests on Ubuntu
        env:
          HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
          # https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
          CUBLAS_WORKSPACE_CONFIG: :16:8
        run: |
          if [ "${{ matrix.module }}" = "ip_adapters" ]; then
              pytest -n 1 --max-worker-restart=0 --dist=loadfile \
              -k "not Flax and not Onnx" \
              --make-reports=tests_pipeline_${{ matrix.module }}_cuda \
              tests/pipelines/${{ matrix.module }}
          else
              pattern=$(cat ${{ steps.extract_tests.outputs.pattern_file }})
              pytest -n 1 --max-worker-restart=0 --dist=loadfile \
              -k "not Flax and not Onnx and $pattern" \
              --make-reports=tests_pipeline_${{ matrix.module }}_cuda \
              tests/pipelines/${{ matrix.module }}
          fi

      - name: Failure short reports
        if: ${{ failure() }}
        run: |
          cat reports/tests_pipeline_${{ matrix.module }}_cuda_stats.txt
          cat reports/tests_pipeline_${{ matrix.module }}_cuda_failures_short.txt
      - name: Test suite reports artifacts
        if: ${{ always() }}
        uses: actions/upload-artifact@v6
        with:
          name: pipeline_${{ matrix.module }}_test_reports
          path: reports

  torch_cuda_tests:
    name: Torch CUDA Tests
    needs: [check_code_quality, check_repository_consistency]
    runs-on:
      group: aws-g4dn-2xlarge
    container:
      image: diffusers/diffusers-pytorch-cuda
      options: --shm-size "16gb" --ipc host --gpus all
    defaults:
      run:
        shell: bash
    strategy:
      fail-fast: false
      max-parallel: 4
      matrix:
        module: [models, schedulers, lora, others]
    steps:
    - name: Checkout diffusers
      uses: actions/checkout@v6
      with:
        fetch-depth: 2

    - name: Install dependencies
      run: |
        uv pip install -e ".[quality]"
        uv pip install peft@git+https://github.com/huggingface/peft.git
        uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
        #uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
        uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1

    - name: Environment
      run: |
        python utils/print_env.py

    - name: Extract tests
      id: extract_tests
      run: |
        pattern=$(python utils/extract_tests_from_mixin.py --type ${{ matrix.module }})
        echo "$pattern" > /tmp/test_pattern.txt
        echo "pattern_file=/tmp/test_pattern.txt" >> $GITHUB_OUTPUT

    - name: Run PyTorch CUDA tests
      env:
        HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
        # https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
        CUBLAS_WORKSPACE_CONFIG: :16:8
      run: |
        pattern=$(cat ${{ steps.extract_tests.outputs.pattern_file }})
        if [ -z "$pattern" ]; then
          pytest -n 1  --max-worker-restart=0 --dist=loadfile -k "not Flax and not Onnx" tests/${{ matrix.module }} \
          --make-reports=tests_torch_cuda_${{ matrix.module }}
        else
          pytest -n 1  --max-worker-restart=0 --dist=loadfile -k "not Flax and not Onnx and $pattern" tests/${{ matrix.module }} \
          --make-reports=tests_torch_cuda_${{ matrix.module }}
        fi

    - name: Failure short reports
      if: ${{ failure() }}
      run: |
        cat reports/tests_torch_cuda_${{ matrix.module }}_stats.txt
        cat reports/tests_torch_cuda_${{ matrix.module }}_failures_short.txt

    - name: Test suite reports artifacts
      if: ${{ always() }}
      uses: actions/upload-artifact@v6
      with:
        name: torch_cuda_test_reports_${{ matrix.module }}
        path: reports

  run_examples_tests:
    name: Examples PyTorch CUDA tests on Ubuntu
    needs: [check_code_quality, check_repository_consistency]
    runs-on:
      group: aws-g4dn-2xlarge

    container:
      image: diffusers/diffusers-pytorch-cuda
      options: --gpus all --shm-size "16gb" --ipc host
    steps:
    - name: Checkout diffusers
      uses: actions/checkout@v6
      with:
        fetch-depth: 2

    - name: NVIDIA-SMI
      run: |
        nvidia-smi
    - name: Install dependencies
      run: |
        #uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
        uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1
        uv pip install -e ".[quality,training]"

    - name: Environment
      run: |
        python utils/print_env.py

    - name: Run example tests on GPU
      env:
        HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
      run: |
        uv pip install ".[training]"
        pytest -n 1 --max-worker-restart=0 --dist=loadfile --make-reports=examples_torch_cuda examples/

    - name: Failure short reports
      if: ${{ failure() }}
      run: |
        cat reports/examples_torch_cuda_stats.txt
        cat reports/examples_torch_cuda_failures_short.txt

    - name: Test suite reports artifacts
      if: ${{ always() }}
      uses: actions/upload-artifact@v6
      with:
        name: examples_test_reports
        path: reports