@@ -178,7 +178,7 @@ void invokeFP4Quantization(int b, int m, int n, T const* input, float const* SFS
178178 config.stream = stream;
179179 cudaLaunchAttribute attrs[1 ];
180180 attrs[0 ].id = cudaLaunchAttributeProgrammaticStreamSerialization;
181- attrs[0 ].val .programmaticStreamSerializationAllowed = tensorrt_llm::common::getEnvEnablePDL () ;
181+ attrs[0 ].val .programmaticStreamSerializationAllowed = false ;
182182 config.numAttrs = 1 ;
183183 config.attrs = attrs;
184184 cudaLaunchKernelEx (&config, kernel_instance, b, m, n, n, input, SFScale, reinterpret_cast <uint32_t *>(output),
@@ -213,7 +213,7 @@ void invokeMxFP8Quantization(int b, int m, int n, int padded_n, T const* input,
213213 config.stream = stream;
214214 cudaLaunchAttribute attrs[1 ];
215215 attrs[0 ].id = cudaLaunchAttributeProgrammaticStreamSerialization;
216- attrs[0 ].val .programmaticStreamSerializationAllowed = tensorrt_llm::common::getEnvEnablePDL () ;
216+ attrs[0 ].val .programmaticStreamSerializationAllowed = false ;
217217 config.numAttrs = 1 ;
218218 config.attrs = attrs;
219219 cudaLaunchKernelEx (&config,
@@ -388,7 +388,7 @@ void computePerTokenGlobalScaleForFP4Quantization(int b, int m, int n, T const*
388388 config.stream = stream;
389389 cudaLaunchAttribute attrs[1 ];
390390 attrs[0 ].id = cudaLaunchAttributeProgrammaticStreamSerialization;
391- attrs[0 ].val .programmaticStreamSerializationAllowed = tensorrt_llm::common::getEnvEnablePDL () ;
391+ attrs[0 ].val .programmaticStreamSerializationAllowed = false ;
392392 config.numAttrs = 1 ;
393393 config.attrs = attrs;
394394 TLLM_CUDA_CHECK (cudaLaunchKernelEx (
0 commit comments