Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 6 additions & 5 deletions clang/lib/CodeGen/CGHLSLRuntime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1532,11 +1532,12 @@ class HLSLBufferCopyEmitter {
bool emitCopy(QualType CType) {
LayoutTy = HLSLBufferLayoutBuilder(CGF.CGM).layOutType(CType);

// TODO: We should be able to fall back to a regular memcpy if the layout
// type doesn't have any padding, but that runs into issues in the backend
// currently.
//
// See https://github.com/llvm/wg-hlsl/issues/351
// If the layout type matches the original type, we can just fall back to a
// regular memcpy.
llvm::Type *OrigTy = CGF.CGM.getTypes().ConvertTypeForMem(CType);
if (LayoutTy == OrigTy)
return false;

emitCopyAtIndices(LayoutTy, llvm::ConstantInt::get(CGF.SizeTy, 0),
llvm::ConstantInt::get(CGF.SizeTy, 0));
return true;
Expand Down
10 changes: 2 additions & 8 deletions clang/test/CodeGenHLSL/ArrayAssignable.hlsl
Original file line number Diff line number Diff line change
Expand Up @@ -153,17 +153,11 @@ void arr_assign8() {
C = c1;
}

// TODO: We should be able to just memcpy here.
// See https://github.com/llvm/wg-hlsl/issues/351
// Since everything is aligned on 16 byte boundaries, we just get memcpy.
//
// CHECK-LABEL: define hidden void {{.*}}arr_assign9
// CHECK: [[C:%.*]] = alloca [2 x <4 x i32>], align 16
// CHECK-NEXT: [[V0:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[C]], i32 0
// CHECK-NEXT: [[L0:%.*]] = load <4 x i32>, ptr addrspace(2) @c2, align 16
// CHECK-NEXT: store <4 x i32> [[L0]], ptr [[V0]], align 16
// CHECK-NEXT: [[V1:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[C]], i32 0, i32 1
// CHECK-NEXT: [[L1:%.*]] = load <4 x i32>, ptr addrspace(2) getelementptr inbounds ([2 x <4 x i32>], ptr addrspace(2) @c2, i32 0, i32 1), align 16
// CHECK-NEXT: store <4 x i32> [[L1]], ptr [[V1]], align 16
// CHECK-NEXT: call void @llvm.memcpy.p0.p2.i32(ptr align 16 [[C]], ptr addrspace(2) align 16 @c2, i32 32, i1 false)
// CHECK-NEXT: ret void
void arr_assign9() {
int4 C[2];
Expand Down