Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
06e1e61
update
zhulinJulia24 Mar 23, 2026
28be81d
update
zhulinJulia24 Mar 23, 2026
f9d3559
updaste
zhulinJulia24 Mar 23, 2026
5ae3ec3
update
zhulinJulia24 Mar 23, 2026
af03b7c
update
zhulinJulia24 Mar 23, 2026
5ffd66e
update
zhulinJulia24 Mar 27, 2026
80d4104
Merge branch 'InternLM:main' into async_result_auto
zhulinJulia24 Mar 30, 2026
dd66f35
update
zhulinJulia24 Mar 30, 2026
7abb292
update
zhulinJulia24 Mar 30, 2026
31cb598
Merge branch 'InternLM:main' into async_result_auto
zhulinJulia24 Mar 31, 2026
9299555
update
zhulinJulia24 Mar 31, 2026
f0e3648
merge main
zhulinJulia24 Mar 31, 2026
d46fff0
update
zhulinJulia24 Apr 1, 2026
199bbd0
Merge branch 'InternLM:main' into async_result_auto
zhulinJulia24 Apr 4, 2026
0259853
Update daily_ete_test.yml
zhulinJulia24 Apr 4, 2026
81bf59e
Update daily_ete_test.yml
zhulinJulia24 Apr 4, 2026
475b94f
Update artifact name to use Python 3.12
zhulinJulia24 Apr 4, 2026
fdd6b66
Merge branch 'InternLM:main' into async_result_auto
zhulinJulia24 Apr 7, 2026
7d674a0
update
zhulinJulia24 Apr 9, 2026
64083a2
Merge branch 'InternLM:main' into async_result_auto
zhulinJulia24 Apr 9, 2026
d747351
update main
zhulinJulia24 Apr 9, 2026
2b3ac36
update
zhulinJulia24 Apr 9, 2026
42d8f50
update
zhulinJulia24 Apr 9, 2026
8f94dd0
update
zhulinJulia24 Apr 9, 2026
f553c66
test
zhulinJulia24 Apr 9, 2026
d9a7859
update
zhulinJulia24 Apr 10, 2026
8e7690b
Merge branch 'InternLM:main' into async_result_auto
zhulinJulia24 Apr 10, 2026
8cab8c7
Merge branch 'InternLM:main' into async_result_auto
zhulinJulia24 Apr 13, 2026
63e19db
update
zhulinJulia24 Apr 13, 2026
546391c
update
zhulinJulia24 Apr 15, 2026
2801a3d
update
zhulinJulia24 Apr 15, 2026
1c9397f
Merge branch 'InternLM:main' into async_result_auto
zhulinJulia24 Apr 15, 2026
db3bfe5
update models list
zhulinJulia24 Apr 15, 2026
c91682b
update
zhulinJulia24 Apr 17, 2026
1abfeb8
merge main
zhulinJulia24 Apr 17, 2026
263f783
update
zhulinJulia24 Apr 17, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .github/workflows/api_eval.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ env:
HOST_LOCALTIME: /usr/share/zoneinfo/Asia/Shanghai
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
REPORT_DIR: /nvme/qa_test_models/evaluation_report/allure_report/${{ inputs.repo_ref }}_${{ github.run_id }}
COV_PARAM: --cov /opt/py3/lib/python3.10/site-packages/lmdeploy
COV_PARAM: --cov /opt/py3/lib/python3.12/site-packages/lmdeploy
TEST_CODE_PATH: /nvme/qa_test_models/test_pkg/lmdeploy/${{ inputs.repo_ref }}_${{ github.run_id }}
OFFLINE_CODE_PATH: /nvme/qa_test_models/offline_pkg/lmdeploy
COMPASS_DATA_CACHE: /nvme/qa_test_models/compass_data_cache
Expand All @@ -58,7 +58,7 @@ jobs:
if: ${{github.event_name == 'schedule' || (!cancelled() && !inputs.offline_mode)}}
strategy:
matrix:
pyver: [py310]
pyver: [py312]
runs-on: ubuntu-latest
env:
PYTHON_VERSION: ${{ matrix.pyver }}
Expand Down Expand Up @@ -132,7 +132,7 @@ jobs:
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
uses: actions/download-artifact@v4
with:
name: my-artifact-${{ github.run_id }}-py310
name: my-artifact-${{ github.run_id }}-py312
- name: Copy Artifacts
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
run: rm ${{env.TEST_CODE_PATH}}/lmdeploy-*.whl -f && cp lmdeploy-*.whl ${{env.TEST_CODE_PATH}}
Expand Down
34 changes: 29 additions & 5 deletions .github/workflows/benchmark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,16 @@ on:
description: 'Whether start a offline mode, if true, you should prepare code and whl package by yourself'
type: boolean
default: false
docker_tag:
required: true
description: 'Docker tag'
type: string
default: 'latest-cu12.8'
result_tag:
required: true
description: 'result_tag if is not none, benchmark results will be uploaded to feishu'
type: string
default: "default"

env:
HOST_PIP_CACHE_DIR: /nvme/github-actions/pip-cache
Expand All @@ -45,7 +55,7 @@ jobs:
if: ${{github.event_name == 'schedule' || (!cancelled() && !inputs.offline_mode)}}
strategy:
matrix:
pyver: [py310]
pyver: [py312]
runs-on: ubuntu-latest
env:
PYTHON_VERSION: ${{ matrix.pyver }}
Expand Down Expand Up @@ -93,7 +103,7 @@ jobs:
runs-on: [self-hosted, linux-a100]
timeout-minutes: 50
container:
image: openmmlab/lmdeploy:latest-cu12.8
image: openmmlab/lmdeploy:${{ inputs.docker_tag || 'latest-cu12.8' }}
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
volumes:
- /nvme/qa_test_models:/nvme/qa_test_models
Expand All @@ -117,7 +127,7 @@ jobs:
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
uses: actions/download-artifact@v4
with:
name: my-artifact-${{ github.run_id }}-py310
name: my-artifact-${{ github.run_id }}-py312
- name: Copy Artifacts
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
run: rm ${{env.TEST_CODE_PATH}}/lmdeploy-*.whl -f && cp lmdeploy-*.whl ${{env.TEST_CODE_PATH}}
Expand Down Expand Up @@ -153,7 +163,7 @@ jobs:
TEST_ENV: ${{ matrix.transformers }}
timeout-minutes: 480
container:
image: openmmlab/lmdeploy:latest-cu12.8
image: openmmlab/lmdeploy:${{ inputs.docker_tag || 'latest-cu12.8' }}
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
volumes:
- /nvme/github-actions/pip-cache:/root/.cache/pip
Expand Down Expand Up @@ -197,11 +207,25 @@ jobs:
if: contains(fromJson(github.event.inputs.backend), 'pytorch') && !contains(fromJson(github.event.inputs.backend), 'turbomind')
run: |
pytest autotest/benchmark/test_${{matrix.benchmark_type}}_performance.py -n ${{matrix.n}} -m '${{matrix.gpu_num}} and not pr_test and not function and pytorch' --alluredir=${{env.ALLURE_REPORT_DIR}}
- name: Generate result
if: always()
run: |
cd /nvme/qa_test_models/feishu_upload
python3 test_benchmark.py --root ${{env.REPORT_DIR}} --output ${{env.REPORT_DIR}}/${{inputs.result_tag}}.txt --hardware A100 --infer-version ${{inputs.result_tag}}
- name: Async result
if: always() && inputs.result_tag != 'default'
env:
FEISHU_APP_ID: ${{secrets.FEISHU_APP_ID}}
FEISHU_APP_SECRET: ${{secrets.FEISHU_APP_SECRET}}
FEISHU_TABLE_TOKEN: ${{secrets.FEISHU_TABLE_TOKEN}}
FEISHU_TABLE_ID: ${{secrets.BENCHMARK_FEISHU_TABLE_ID}}
run: |
cd /nvme/qa_test_models/feishu_upload
python3 main.py --skip-duplicates ${{env.REPORT_DIR}}/${{inputs.result_tag}}.txt --config config-benchmark.py
- name: Clear workfile
if: always()
run: |
echo "status=done" >> ${{env.REPORT_DIR}}/status.txt
chmod -R 777 $REPORT_DIR
export workdir=$(pwd)
cd ..
rm -rf $workdir
Expand Down
77 changes: 54 additions & 23 deletions .github/workflows/daily_ete_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,11 @@ on:
description: 'regression functions'
type: string
default: "['quant', 'tools','restful','pipeline','benchmark','evaluation']"
docker_tag:
required: true
description: 'Docker tag'
type: string
default: 'nightly-test-cu12.8'
schedule:
- cron: '00 14 * * 0-4'

Expand All @@ -48,7 +53,7 @@ env:
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
ROOT_DIR: /nvme/qa_test_models
REPORT_DIR: /nvme/qa_test_models/test-reports/${{ inputs.repo_ref || 'main' }}_${{ github.run_id }}
COV_PARAM: --cov /opt/py3/lib/python3.10/site-packages/lmdeploy
COV_PARAM: --cov /opt/py3/lib/python3.12/site-packages/lmdeploy
TEST_CODE_PATH: /nvme/qa_test_models/test_pkg/lmdeploy/${{ inputs.repo_ref || 'main' }}_${{ github.run_id }}
OFFLINE_CODE_PATH: /nvme/qa_test_models/offline_pkg/lmdeploy
OFFLINE_REQUIREMENTS: /nvme/qa_test_models/offline_pkg/requirements.txt
Expand All @@ -60,7 +65,7 @@ jobs:
if: ${{!cancelled() && (github.event_name == 'schedule' || !inputs.offline_mode)}}
strategy:
matrix:
pyver: [py310]
pyver: [py312]
runs-on: ubuntu-latest
env:
PYTHON_VERSION: ${{ matrix.pyver }}
Expand Down Expand Up @@ -109,7 +114,7 @@ jobs:
runs-on: [self-hosted, linux-a100]
timeout-minutes: 50
container:
image: openmmlab/lmdeploy:latest-cu12.8
image: openmmlab/lmdeploy:${{ inputs.docker_tag || 'nightly-test-cu12.8' }}
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
volumes:
- /nvme/qa_test_models:/nvme/qa_test_models
Expand All @@ -131,7 +136,7 @@ jobs:
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
uses: actions/download-artifact@v4
with:
name: my-artifact-${{ github.run_id }}-py310
name: my-artifact-${{ github.run_id }}-py312
- name: Copy Artifacts
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
run: rm ${{env.TEST_CODE_PATH}}/lmdeploy-*.whl -f && cp lmdeploy-*.whl ${{env.TEST_CODE_PATH}}
Expand All @@ -140,9 +145,13 @@ jobs:
run: rm ${{env.TEST_CODE_PATH}}/lmdeploy-*.whl -f && cp ${{env.OFFLINE_CODE_PATH}}/lmdeploy-*.whl ${{env.TEST_CODE_PATH}}
- name: Mark as start
run: |
chmod -R 777 ${{env.TEST_CODE_PATH}}
mkdir ${{env.REPORT_DIR}} -p
echo "starttime=$(date +%s)" > ${{env.REPORT_DIR}}/status.txt
- name: Clear workfile
if: always()
run: |
chmod -R 777 ${{env.TEST_CODE_PATH}}
chmod -R 777 ${{env.REPORT_DIR}}

test_quantization:
needs: download_pkgs
Expand All @@ -158,7 +167,7 @@ jobs:
MODELSCOPE_MODULES_CACHE: /nvme/qa_test_models/modelscope_modules
TEST_ENV: ${{ matrix.transformers }}
container:
image: openmmlab/lmdeploy:latest-cu12.8
image: openmmlab/lmdeploy:${{ inputs.docker_tag || 'nightly-test-cu12.8' }}
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
volumes:
- /nvme/github-actions/pip-cache:/root/.cache/pip
Expand All @@ -177,7 +186,7 @@ jobs:
echo "starttime=$(date +%s)" > ${{env.REPORT_DIR}}/status.txt
- name: Install lmdeploy - dependency
run: |
python3 -m pip install auto_gptq matplotlib attrdict
python3 -m pip install matplotlib attrdict
python3 -m pip install -r requirements/lite.txt
- name: Install lmdeploy
run: |
Expand Down Expand Up @@ -210,7 +219,6 @@ jobs:
if: always()
run: |
echo "status=done" >> ${{env.REPORT_DIR}}/status.txt
chmod -R 777 ${{env.ROOT_DIR}}
export workdir=$(pwd)
cd ..
rm -rf $workdir
Expand Down Expand Up @@ -246,7 +254,7 @@ jobs:
MODELSCOPE_MODULES_CACHE: /nvme/qa_test_models/modelscope_modules
TEST_ENV: ${{ matrix.transformers }}
container:
image: openmmlab/lmdeploy:latest-cu12.8
image: openmmlab/lmdeploy:${{ inputs.docker_tag || 'nightly-test-cu12.8' }}
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
volumes:
- /nvme/github-actions/pip-cache:/root/.cache/pip
Expand Down Expand Up @@ -330,7 +338,6 @@ jobs:
if: always()
run: |
echo "status=done" >> ${{env.REPORT_DIR}}/status.txt
chmod -R 777 ${{env.ROOT_DIR}}
export workdir=$(pwd)
cd ..
rm -rf $workdir
Expand All @@ -347,6 +354,36 @@ jobs:
backend: ${{ fromJSON(inputs.backend || '["turbomind", "pytorch"]')}}
model_path: ['Qwen/Qwen3-8B-Base', 'Qwen/Qwen3-30B-A3B', 'Qwen/Qwen3-32B', 'OpenGVLab/InternVL3_5-30B-A3B', 'OpenGVLab/InternVL3-38B', 'Qwen/Qwen3-VL-8B-Instruct', 'Qwen/Qwen3-VL-30B-A3B-Instruct']
include:
- tp: 2
model: Qwen3.5-35B-A3B
model_path: Qwen/Qwen3.5-35B-A3B
case_info: ['chat_completions_v1', 'generate']
generate_type: all
extra: '--logprobs-mode raw_logprobs --enable-return-routed-experts'
backend: pytorch
- tp: 2
model: Qwen3.5-35B-A3B
model_path: Qwen/Qwen3.5-35B-A3B
case_info: ['chat_completions_v1', 'generate']
generate_type: logprob
extra: '--logprobs-mode raw_logprobs'
backend: turbomind
- tp: 2
model: Qwen3.5-27B
model_path: Qwen/Qwen3.5-27B
case_info: ['chat_completions_v1', 'generate']
generate_type: logprob
extra: '--logprobs-mode raw_logprobs'
- tp: 2
model: Qwen3.5-35B-A3B-Base
model_path: Qwen/Qwen3.5-35B-A3B-Base
case_info: ['completions_v1']
generate_type: base
- tp: 1
model: Qwen3.5-2B-Base
model_path: Qwen/Qwen3.5-2B-Base
case_info: ['completions_v1']
generate_type: base
- tp: 2
model: Qwen3-8B-Base
model_path: Qwen/Qwen3-8B-Base
Expand Down Expand Up @@ -422,7 +459,7 @@ jobs:
extra: '--logprobs-mode raw_logprobs'
timeout-minutes: 60
container:
image: openmmlab/lmdeploy:latest-cu12.8
image: openmmlab/lmdeploy:${{ inputs.docker_tag || 'nightly-test-cu12.8' }}
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
volumes:
- /nvme/github-actions/pip-cache:/root/.cache/pip
Expand Down Expand Up @@ -527,7 +564,6 @@ jobs:
if: always()
run: |
echo "status=done" >> ${{env.REPORT_DIR}}/status.txt
chmod -R 777 ${{env.ROOT_DIR}}
export workdir=$(pwd)
cd ..
rm -rf $workdir
Expand All @@ -540,7 +576,7 @@ jobs:
needs: test_quantization
timeout-minutes: 240
container:
image: openmmlab/lmdeploy:latest-cu12.8
image: openmmlab/lmdeploy:${{ inputs.docker_tag || 'nightly-test-cu12.8' }}
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
volumes:
- /nvme/github-actions/pip-cache:/root/.cache/pip
Expand Down Expand Up @@ -590,7 +626,6 @@ jobs:
if: always()
run: |
echo "status=done" >> ${{env.REPORT_DIR}}/status.txt
chmod -R 777 ${{env.ROOT_DIR}}
export workdir=$(pwd)
cd ..
rm -rf $workdir
Expand All @@ -604,7 +639,7 @@ jobs:
needs: test_quantization
timeout-minutes: 120
container:
image: openmmlab/lmdeploy:latest-cu12.8
image: openmmlab/lmdeploy:${{ inputs.docker_tag || 'nightly-test-cu12.8' }}
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
volumes:
- /nvme/github-actions/pip-cache:/root/.cache/pip
Expand Down Expand Up @@ -646,7 +681,6 @@ jobs:
if: always()
run: |
echo "status=done" >> ${{env.REPORT_DIR}}/status.txt
chmod -R 777 ${{env.ROOT_DIR}}
export workdir=$(pwd)
cd ..
rm -rf $workdir
Expand All @@ -671,7 +705,7 @@ jobs:
generate_type: base
timeout-minutes: 60
container:
image: openmmlab/lmdeploy:latest-cu12.8
image: openmmlab/lmdeploy:${{ inputs.docker_tag || 'nightly-test-cu12.8' }}
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
volumes:
- /nvme/github-actions/pip-cache:/root/.cache/pip
Expand Down Expand Up @@ -777,7 +811,6 @@ jobs:
if: always()
run: |
echo "status=done" >> ${{env.REPORT_DIR}}/status.txt
chmod -R 777 ${{env.ROOT_DIR}}
export workdir=$(pwd)
cd ..
rm -rf $workdir
Expand All @@ -790,7 +823,7 @@ jobs:
needs: test_quantization
timeout-minutes: 240
container:
image: openmmlab/lmdeploy:latest-cu12.8
image: openmmlab/lmdeploy:${{ inputs.docker_tag || 'nightly-test-cu12.8' }}
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
volumes:
- /nvme/github-actions/pip-cache:/root/.cache/pip
Expand Down Expand Up @@ -841,7 +874,6 @@ jobs:
if: always()
run: |
echo "status=done" >> ${{env.REPORT_DIR}}/status.txt
chmod -R 777 ${{env.ROOT_DIR}}
export workdir=$(pwd)
cd ..
rm -rf $workdir
Expand All @@ -854,7 +886,7 @@ jobs:
needs: [test_tools, test_restful, test_pipeline, test_benchmark]
timeout-minutes: 5
container:
image: openmmlab/lmdeploy:latest-cu12.8
image: openmmlab/lmdeploy:${{ inputs.docker_tag || 'nightly-test-cu12.8' }}
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
volumes:
- /nvme/github-actions/pip-cache:/root/.cache/pip
Expand All @@ -866,7 +898,6 @@ jobs:
run: cp -r ${{env.TEST_CODE_PATH}}/. .
- name: Install lmdeploy
run: |
echo "status=done" >> ${{env.REPORT_DIR}}/status.txt
python3 -m pip uninstall lmdeploy -y && python3 -m pip install lmdeploy-*.whl --no-deps
python3 -m pip install -r requirements/test.txt
- name: Get coverage report
Expand All @@ -879,7 +910,7 @@ jobs:
- name: Clear workfile
if: always()
run: |
chmod -R 777 ${{env.ROOT_DIR}}
chmod -R 777 ${{env.REPORT_DIR}}
export workdir=$(pwd)
cd ..
rm -rf $workdir
Expand Down
Loading
Loading