Skip to content

Commit 0163e1d

Browse files
committed
[TRTLLM-9752][fix] disable PDL for quant kernels
Signed-off-by: Bo Deng <[email protected]>
1 parent 16fd781 commit 0163e1d

File tree

1 file changed

+3
-3
lines changed

1 file changed

+3
-3
lines changed

cpp/tensorrt_llm/kernels/quantization.cu

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,7 @@ void invokeFP4Quantization(int b, int m, int n, T const* input, float const* SFS
178178
config.stream = stream;
179179
cudaLaunchAttribute attrs[1];
180180
attrs[0].id = cudaLaunchAttributeProgrammaticStreamSerialization;
181-
attrs[0].val.programmaticStreamSerializationAllowed = tensorrt_llm::common::getEnvEnablePDL();
181+
attrs[0].val.programmaticStreamSerializationAllowed = false;
182182
config.numAttrs = 1;
183183
config.attrs = attrs;
184184
cudaLaunchKernelEx(&config, kernel_instance, b, m, n, n, input, SFScale, reinterpret_cast<uint32_t*>(output),
@@ -213,7 +213,7 @@ void invokeMxFP8Quantization(int b, int m, int n, int padded_n, T const* input,
213213
config.stream = stream;
214214
cudaLaunchAttribute attrs[1];
215215
attrs[0].id = cudaLaunchAttributeProgrammaticStreamSerialization;
216-
attrs[0].val.programmaticStreamSerializationAllowed = tensorrt_llm::common::getEnvEnablePDL();
216+
attrs[0].val.programmaticStreamSerializationAllowed = false;
217217
config.numAttrs = 1;
218218
config.attrs = attrs;
219219
cudaLaunchKernelEx(&config,
@@ -388,7 +388,7 @@ void computePerTokenGlobalScaleForFP4Quantization(int b, int m, int n, T const*
388388
config.stream = stream;
389389
cudaLaunchAttribute attrs[1];
390390
attrs[0].id = cudaLaunchAttributeProgrammaticStreamSerialization;
391-
attrs[0].val.programmaticStreamSerializationAllowed = tensorrt_llm::common::getEnvEnablePDL();
391+
attrs[0].val.programmaticStreamSerializationAllowed = false;
392392
config.numAttrs = 1;
393393
config.attrs = attrs;
394394
TLLM_CUDA_CHECK(cudaLaunchKernelEx(

0 commit comments

Comments
 (0)