summaryrefslogtreecommitdiffstats
path: root/source/slang
diff options
context:
space:
mode:
Diffstat (limited to 'source/slang')
-rw-r--r--source/slang/diff.meta.slang130
-rw-r--r--source/slang/slang-emit-c-like.cpp5
-rw-r--r--source/slang/slang-emit-c-like.h1
-rw-r--r--source/slang/slang-emit-torch.cpp95
-rw-r--r--source/slang/slang-emit-torch.h2
-rw-r--r--source/slang/slang-intrinsic-expand.cpp27
-rw-r--r--source/slang/slang-ir-check-differentiability.cpp2
-rw-r--r--source/slang/slang-ir-inst-defs.h3
-rw-r--r--source/slang/slang-ir-insts.h2
-rw-r--r--source/slang/slang-ir-pytorch-cpp-binding.cpp4
-rw-r--r--source/slang/slang-ir.cpp5
-rw-r--r--source/slang/slang-stdlib.cpp3
-rw-r--r--source/slang/slang.cpp1
13 files changed, 215 insertions, 65 deletions
diff --git a/source/slang/diff.meta.slang b/source/slang/diff.meta.slang
index 51cf1cdb7..252b6f5e9 100644
--- a/source/slang/diff.meta.slang
+++ b/source/slang/diff.meta.slang
@@ -34,6 +34,15 @@ struct TensorView
[__readNone]
Ptr<T> data_ptr();
+ __target_intrinsic(cuda, "$0.data_ptr_at<$G0>($1)")
+ [__readNone]
+ Ptr<T> data_ptr_at(uint index);
+
+ __generic<let N: int>
+ __target_intrinsic(cuda, "$0.data_ptr_at<$G0>($1)")
+ [__readNone]
+ Ptr<T> data_ptr_at(vector<uint, N> index);
+
__implicit_conversion($(kConversionCost_ImplicitDereference))
__intrinsic_op($(kIROp_TorchTensorGetView))
__init(TorchTensor<T> t);
@@ -65,6 +74,13 @@ struct TensorView
__target_intrinsic(cuda, "$0.store<$G0>($1, $2, $3, $4, $5, $6)")
void store(uint i0, uint i1, uint i2, uint i3, uint i4, T val);
+ __target_intrinsic(cuda, "atomicAdd($0.data_ptr_at<$TR>($1), $2)")
+ T InterlockedAdd(uint index, T val);
+
+ __generic<let N:int>
+ __target_intrinsic(cuda, "atomicAdd($0.data_ptr_at<$TR>($1), $2)")
+ T InterlockedAdd(vector<uint, N> index, T val);
+
__target_intrinsic(cuda, "$0.dimensionCount")
[__readNone]
uint dims();
@@ -81,44 +97,139 @@ struct TensorView
{
[ForceInline] [__readNone] get { return load(index); }
[ForceInline] set { store(index, newValue); }
+
+ __target_intrinsic(cuda, "$0.load<$G0>($1)")
+ ref;
}
__subscript(uint i1, uint i2) -> T
{
[ForceInline] [__readNone] get { return load(i1, i2); }
[ForceInline] set { store(i1, i2, newValue); }
+ __target_intrinsic(cuda, "$0.load<$G0>($1, $2)")
+ ref;
}
__subscript(uint2 i) -> T
{
[ForceInline] [__readNone] get { return load(i.x, i.y); }
[ForceInline] set { store(i.x, i.y, newValue); }
+ __target_intrinsic(cuda, "$0.load<$G0>($1.x, $1.y)")
+ ref;
}
__subscript(uint i1, uint i2, uint i3) -> T
{
[ForceInline] [__readNone] get { return load(i1, i2, i3); }
[ForceInline] set { store(i1, i2, i3, newValue); }
+ __target_intrinsic(cuda, "$0.load<$G0>($1, $2, $3)")
+ ref;
}
__subscript(uint3 i) -> T
{
[ForceInline] [__readNone] get { return load(i.x, i.y, i.z); }
[ForceInline] set { store(i.x, i.y, i.z, newValue); }
+ __target_intrinsic(cuda, "$0.load<$G0>($1.x, $1.y, $1.z)")
+ ref;
}
__subscript(uint i1, uint i2, uint i3, uint i4) -> T
{
[ForceInline] [__readNone] get { return load(i1, i2, i3, i4); }
[ForceInline] set { store(i1, i2, i3, i4, newValue); }
+ __target_intrinsic(cuda, "$0.load<$G0>($1, $2, $3, $4)")
+ ref;
}
__subscript(uint4 i) -> T
{
[__readNone][ForceInline] get { return load(i.x, i.y, i.z, i.w); }
[ForceInline] set { store(i.x, i.y, i.z, i.w, newValue); }
+ __target_intrinsic(cuda, "$0.load<$G0>($1.x, $1.y, $1.z, $1.w)")
+ ref;
}
__subscript(uint i1, uint i2, uint i3, uint i4, uint i5) -> T
{
[ForceInline] [__readNone] get { return load(i1, i2, i3, i4, i5); }
[ForceInline] set { store(i1, i2, i3, i4, i5, newValue); }
+ __target_intrinsic(cuda, "$0.load<$G0>($1, $2, $3, $4, $5)")
+ ref;
}
}
+${{{{
+for (auto atomicIntegerTypeName : kCudaAtomicIntegerTypes)
+{
+}}}}
+extension TensorView<$(atomicIntegerTypeName)>
+{
+ typealias __Element = $(atomicIntegerTypeName);
+ __target_intrinsic(cuda, "atomicInc($0.data_ptr_at<$TR>($1), $2)")
+ __Element InterlockedIncrement(uint index, __Element val);
+
+ __generic<let N:int>
+ __target_intrinsic(cuda, "atomicInc($0.data_ptr_at<$TR>($1), $2)")
+ __Element InterlockedIncrement(vector<uint, N> index, __Element val);
+
+ __target_intrinsic(cuda, "atomicMin($0.data_ptr_at<$TR>($1), $2)")
+ __Element InterlockedMin(uint index, __Element val);
+
+ __generic<let N:int>
+ __target_intrinsic(cuda, "atomicMin($0.data_ptr_at<$TR>($1), $2)")
+ __Element InterlockedMin(vector<uint, N> index, __Element val);
+
+ __target_intrinsic(cuda, "atomicMax($0.data_ptr_at<$TR>($1), $2)")
+ __Element InterlockedMax<T>(uint index, __Element val);
+
+ __generic<let N:int>
+ __target_intrinsic(cuda, "atomicMax($0.data_ptr_at<$TR>($1), $2)")
+ __Element InterlockedMax(vector<uint, N> index, __Element val);
+
+ __target_intrinsic(cuda, "atomicAnd($0.data_ptr_at<$TR>($1), $2)")
+ __Element InterlockedAnd<T>(uint index, __Element val);
+
+ __generic<let N:int>
+ __target_intrinsic(cuda, "atomicAnd($0.data_ptr_at<$TR>($1), $2)")
+ __Element InterlockedAnd(vector<uint, N> index, __Element val);
+
+ __target_intrinsic(cuda, "atomicOr($0.data_ptr_at<$TR>($1), $2)")
+ __Element InterlockedOr<T>(uint index, __Element val);
+
+ __generic<let N:int>
+ __target_intrinsic(cuda, "atomicOr($0.data_ptr_at<$TR>($1), $2)")
+ __Element InterlockedOr(vector<uint, N> index, __Element val);
+
+ __target_intrinsic(cuda, "atomicXor($0.data_ptr_at<$TR>($1), $2)")
+ __Element InterlockedXor<T>(uint index, __Element val);
+
+ __generic<let N:int>
+ __target_intrinsic(cuda, "atomicXor($0.data_ptr_at<$TR>($1), $2)")
+ __Element InterlockedXor(vector<uint, N> index, __Element val);
+
+ __target_intrinsic(cuda, "atomicExch($0.data_ptr_at<$TR>($1), $2)")
+ __Element InterlockedExchange(uint index, __Element val);
+
+ __generic<let N:int>
+ __target_intrinsic(cuda, "atomicExch($0.data_ptr_at<$TR>($1), $2)")
+ __Element InterlockedExchange(vector<uint, N> index, __Element val);
+
+ __target_intrinsic(cuda, "atomicCAS($0.data_ptr_at<$TR>($1), $2, $3)")
+ __Element InterlockedCompareExchange(uint index, __Element compare, __Element val);
+
+ __generic<let N:int>
+ __target_intrinsic(cuda, "atomicCAS($0.data_ptr_at<$TR>($1), $2, $3)")
+ __Element InterlockedCompareExchange(vector<uint, N> index, __Element compare, __Element val);
+}
+
+${{{{
+} // end for atomicIntegerTypeName
+}}}}
+
+extension TensorView<float>
+{
+ __target_intrinsic(cuda, "atomicExch($0.data_ptr_at<$G0>($1), $2)")
+ float InterlockedExchange(uint index, float val);
+
+ __generic<let N:int>
+ __target_intrinsic(cuda, "atomicExch($0.data_ptr_at<$G0>($1), $2)")
+ float InterlockedExchange(vector<uint, N> index, float val);
+}
+
__generic<T>
__intrinsic_type($(kIROp_TorchTensorType))
struct TorchTensor
@@ -162,9 +273,26 @@ struct TorchTensor
static TorchTensor<T> alloc(uint i0, uint i1, uint i2, uint i3, uint i4);
__intrinsic_op($(kIROp_AllocateTorchTensor))
- static TorchTensor<T> zerosLike(TorchTensor<T> other);
+ static TorchTensor<T> emptyLike(TorchTensor<T> other);
+
+ __target_intrinsic(cpp, "$0.zero_()")
+ void fillZero();
+
+ __target_intrinsic(cpp, "$0.fill_($1)")
+ void fillValue(T val);
+
+ static TorchTensor<T> zerosLike(TorchTensor<T> other)
+ {
+ var result = emptyLike(other);
+ result.fillZero();
+ return result;
+ }
+
}
+__target_intrinsic(cpp, "AT_CUDA_CHECK(cudaStreamSynchronize(at::cuda::getCurrentCUDAStream()))")
+void syncTorchCudaStream();
+
__generic<T: IDifferentiable>
__intrinsic_op($(kIROp_MakeDifferentialPairUserCode))
DifferentialPair<T> diffPair(T primal, T.Differential diff);
diff --git a/source/slang/slang-emit-c-like.cpp b/source/slang/slang-emit-c-like.cpp
index 08ba050db..166c131d5 100644
--- a/source/slang/slang-emit-c-like.cpp
+++ b/source/slang/slang-emit-c-like.cpp
@@ -2405,6 +2405,11 @@ void CLikeSourceEmitter::_emitInst(IRInst* inst)
m_writer->emit(";\n");
break;
+ // Insts that needs to be emitted as code blocks.
+ case kIROp_CudaKernelLaunch:
+ emitInstStmtImpl(inst);
+ break;
+
case kIROp_LiveRangeStart:
case kIROp_LiveRangeEnd:
emitLiveness(inst);
diff --git a/source/slang/slang-emit-c-like.h b/source/slang/slang-emit-c-like.h
index 8046fa633..bfe3dcc4e 100644
--- a/source/slang/slang-emit-c-like.h
+++ b/source/slang/slang-emit-c-like.h
@@ -530,6 +530,7 @@ public:
virtual bool tryEmitGlobalParamImpl(IRGlobalParam* varDecl, IRType* varType) { SLANG_UNUSED(varDecl); SLANG_UNUSED(varType); return false; }
virtual bool tryEmitInstExprImpl(IRInst* inst, const EmitOpInfo& inOuterPrec) { SLANG_UNUSED(inst); SLANG_UNUSED(inOuterPrec); return false; }
+ virtual void emitInstStmtImpl(IRInst* inst) { SLANG_UNUSED(inst); }
virtual void emitPostKeywordTypeAttributesImpl(IRInst* inst) { SLANG_UNUSED(inst); }
diff --git a/source/slang/slang-emit-torch.cpp b/source/slang/slang-emit-torch.cpp
index 276164ed5..c198b011d 100644
--- a/source/slang/slang-emit-torch.cpp
+++ b/source/slang/slang-emit-torch.cpp
@@ -65,6 +65,49 @@ void emitTorchScalarTypeName(SourceWriter* m_writer, IRInst* type)
}
}
+void TorchCppSourceEmitter::emitInstStmtImpl(IRInst* inst)
+{
+ switch (inst->getOp())
+ {
+ default:
+ return;
+ case kIROp_CudaKernelLaunch:
+ {
+ m_writer->emit("AT_CUDA_CHECK(cudaLaunchKernel(");
+ // func
+ m_writer->emit("(const void*)(");
+ emitOperand(inst->getOperand(0), getInfo(EmitOp::General));
+ m_writer->emit("), ");
+
+ // gridDim
+ m_writer->emit("slang_bit_cast<dim3>(");
+ emitOperand(inst->getOperand(1), getInfo(EmitOp::General));
+ m_writer->emit("), ");
+
+ // blockDim
+ m_writer->emit("slang_bit_cast<dim3>(");
+ emitOperand(inst->getOperand(2), getInfo(EmitOp::General));
+ m_writer->emit("), ");
+
+ // args
+ emitOperand(inst->getOperand(3), getInfo(EmitOp::General));
+ m_writer->emit(", ");
+
+ // shared mem
+ m_writer->emit("slangGetCudaKernelSharedMemSize((const void*)(");
+ emitOperand(inst->getOperand(0), getInfo(EmitOp::General));
+ m_writer->emit(")), ");
+
+ // stream
+ m_writer->emit("((cudaStream_t)");
+ emitOperand(inst->getOperand(4), getInfo(EmitOp::General));
+ m_writer->emit(")));\n");
+
+ break;
+ }
+ }
+}
+
bool TorchCppSourceEmitter::tryEmitInstExprImpl(IRInst* inst, const EmitOpInfo& inOuterPrec)
{
switch (inst->getOp())
@@ -78,47 +121,12 @@ bool TorchCppSourceEmitter::tryEmitInstExprImpl(IRInst* inst, const EmitOpInfo&
m_writer->emit("make_tensor_view(");
emitOperand(inst->getOperand(0), getInfo(EmitOp::General));
m_writer->emit(", ");
- emitOperand(inst->getOperand(1), getInfo(EmitOp::General));
- m_writer->emit(", ");
- emitStringLiteral(getUnmangledName(inst->getOperand(1)));
+ emitStringLiteral(getUnmangledName(inst->getOperand(0)));
m_writer->emit(", ");
- emitTorchScalarTypeName(m_writer, inst->getOperand(1)->getDataType());
+ emitTorchScalarTypeName(m_writer, inst->getOperand(0)->getDataType());
m_writer->emit(")");
return true;
}
- case kIROp_CudaKernelLaunch:
- {
- m_writer->emit("cudaLaunchKernel(");
- // func
- m_writer->emit("(const void*)(");
- emitOperand(inst->getOperand(0), getInfo(EmitOp::General));
- m_writer->emit("), ");
-
- // gridDim
- m_writer->emit("slang_bit_cast<dim3>(");
- emitOperand(inst->getOperand(1), getInfo(EmitOp::General));
- m_writer->emit("), ");
-
- // blockDim
- m_writer->emit("slang_bit_cast<dim3>(");
- emitOperand(inst->getOperand(2), getInfo(EmitOp::General));
- m_writer->emit("), ");
-
- // args
- emitOperand(inst->getOperand(3), getInfo(EmitOp::General));
- m_writer->emit(", ");
-
- // shared mem
- m_writer->emit("slangGetCudaKernelSharedMemSize((const void*)(");
- emitOperand(inst->getOperand(0), getInfo(EmitOp::General));
- m_writer->emit(")), ");
-
- // stream
- m_writer->emit("((cudaStream_t)");
- emitOperand(inst->getOperand(4), getInfo(EmitOp::General));
- m_writer->emit("))");
- return true;
- }
case kIROp_TorchGetCudaStream:
{
m_writer->emit("at::cuda::getCurrentCUDAStream()");
@@ -131,12 +139,14 @@ bool TorchCppSourceEmitter::tryEmitInstExprImpl(IRInst* inst, const EmitOpInfo&
/*
Emit something like:
```
- torch::Tensor out = torch::zeros_like(other);
+ torch::Tensor out = torch::empty_like(other);
```
*/
- m_writer->emit("torch::zeros_like(");
+ m_writer->emit("torch::empty_like(");
emitOperand(inst->getOperand(0), getInfo(EmitOp::General));
- m_writer->emit(")");
+ m_writer->emit(", torch::TensorOptions().device(torch::kCUDA).dtype(");
+ emitTorchScalarTypeName(m_writer, inst->getDataType());
+ m_writer->emit("))");
}
else
{
@@ -180,11 +190,6 @@ SlangResult TorchCppSourceEmitter::calcTypeName(IRType* type, CodeGenTarget targ
out << "torch::Tensor";
return SLANG_OK;
}
- case kIROp_TorchKernelMemoryAllocatorType:
- {
- out << "CudaTaskMemoryAllocator";
- return SLANG_OK;
- }
}
}
diff --git a/source/slang/slang-emit-torch.h b/source/slang/slang-emit-torch.h
index 84ce42331..aeb9058a4 100644
--- a/source/slang/slang-emit-torch.h
+++ b/source/slang/slang-emit-torch.h
@@ -19,6 +19,8 @@ public:
protected:
// CPPSourceEmitter overrides
+ virtual void emitInstStmtImpl(IRInst* inst) override;
+
virtual bool tryEmitInstExprImpl(IRInst* inst, const EmitOpInfo& inOuterPrec) override;
virtual SlangResult calcTypeName(IRType* type, CodeGenTarget target, StringBuilder& out) override;
virtual void emitModuleImpl(IRModule* module, DiagnosticSink* sink) override;
diff --git a/source/slang/slang-intrinsic-expand.cpp b/source/slang/slang-intrinsic-expand.cpp
index de3396efb..7a4744d59 100644
--- a/source/slang/slang-intrinsic-expand.cpp
+++ b/source/slang/slang-intrinsic-expand.cpp
@@ -268,17 +268,28 @@ const char* IntrinsicExpandContext::_emitSpecial(const char* cursor)
break;
case 'T':
- // Get the the 'element' type for the type of the param at the index
+ // Get the 'element' or `return` type for the type of the param at the index
{
- SLANG_RELEASE_ASSERT(*cursor >= '0' && *cursor <= '9');
- Index argIndex = (*cursor++) - '0' + m_argIndexOffset;
- SLANG_RELEASE_ASSERT(m_argCount > argIndex);
-
- IRType* type = m_args[argIndex].get()->getDataType();
- if (auto baseTextureType = as<IRTextureType>(type))
+ IRType* type = nullptr;
+ if (*cursor == 'R')
{
- type = baseTextureType->getElementType();
+ // Get the return type of the call
+ cursor++;
+ type = m_callInst->getDataType();
+ }
+ else
+ {
+ SLANG_RELEASE_ASSERT(*cursor >= '0' && *cursor <= '9');
+ Index argIndex = (*cursor++) - '0' + m_argIndexOffset;
+ SLANG_RELEASE_ASSERT(m_argCount > argIndex);
+
+ type = m_args[argIndex].get()->getDataType();
+ if (auto baseTextureType = as<IRTextureType>(type))
+ {
+ type = baseTextureType->getElementType();
+ }
}
+ SLANG_RELEASE_ASSERT(type);
m_emitter->emitType(type);
}
break;
diff --git a/source/slang/slang-ir-check-differentiability.cpp b/source/slang/slang-ir-check-differentiability.cpp
index c4b09d9e8..355381559 100644
--- a/source/slang/slang-ir-check-differentiability.cpp
+++ b/source/slang/slang-ir-check-differentiability.cpp
@@ -332,8 +332,6 @@ public:
{
if (expectDiffInstWorkListSet.Add(inst))
{
- if (inst->getFullType() && inst->getFullType()->getOp() == kIROp_IntType)
- printf("break");
expectDiffInstWorkList.add(inst);
}
}
diff --git a/source/slang/slang-ir-inst-defs.h b/source/slang/slang-ir-inst-defs.h
index e58094b15..8d06c6970 100644
--- a/source/slang/slang-ir-inst-defs.h
+++ b/source/slang/slang-ir-inst-defs.h
@@ -69,7 +69,6 @@ INST(Nop, nop, 0, 0)
INST(TensorViewType, TensorView, 1, HOISTABLE)
INST(TorchTensorType, TorchTensor, 0, HOISTABLE)
- INST(TorchKernelMemoryAllocatorType, TorchMemAllocatorType, 0, HOISTABLE)
INST(ArrayListType, ArrayListVector, 1, HOISTABLE)
/* BindExistentialsTypeBase */
@@ -614,7 +613,7 @@ INST(GetOptiXSbtDataPtr, getOptiXSbtDataPointer, 0, 0)
INST(MakeArrayList, makeArrayList, 0, 0)
INST(MakeTensorView, makeTensorView, 0, 0)
-INST(AllocateTorchTensor, allocTorchTensor , 0, 0)
+INST(AllocateTorchTensor, allocTorchTensor, 0, 0)
INST(TorchGetCudaStream, TorchGetCudaStream, 0, 0)
INST(TorchTensorGetView, TorchTensorGetView, 0, 0)
diff --git a/source/slang/slang-ir-insts.h b/source/slang/slang-ir-insts.h
index f5b03eb45..26d00d4df 100644
--- a/source/slang/slang-ir-insts.h
+++ b/source/slang/slang-ir-insts.h
@@ -3129,7 +3129,7 @@ public:
return emitMakeStruct(type, args.getCount(), args.getBuffer());
}
- IRInst* emitMakeTensorView(IRType* type, IRInst* allocator, IRInst* val);
+ IRInst* emitMakeTensorView(IRType* type, IRInst* val);
IRInst* emitMakeExistential(
IRType* type,
diff --git a/source/slang/slang-ir-pytorch-cpp-binding.cpp b/source/slang/slang-ir-pytorch-cpp-binding.cpp
index 971e87a6f..d59d57474 100644
--- a/source/slang/slang-ir-pytorch-cpp-binding.cpp
+++ b/source/slang/slang-ir-pytorch-cpp-binding.cpp
@@ -263,8 +263,6 @@ static void generateCppBindingForFunc(IRFunc* func, DiagnosticSink* sink)
oldParam->removeAndDeallocate();
}
- auto allocator = builder.emitVar(builder.getType(kIROp_TorchKernelMemoryAllocatorType));
-
for (auto block : func->getBlocks())
{
for (auto inst : block->getChildren())
@@ -297,7 +295,7 @@ static void generateCppBindingForFunc(IRFunc* func, DiagnosticSink* sink)
else if (auto getView = as<IRTorchTensorGetView>(inst))
{
builder.setInsertBefore(getView);
- auto makeView = builder.emitMakeTensorView(getView->getFullType(), allocator, inst->getOperand(0));
+ auto makeView = builder.emitMakeTensorView(getView->getFullType(), inst->getOperand(0));
getView->replaceUsesWith(makeView);
instsToRemove.add(getView);
}
diff --git a/source/slang/slang-ir.cpp b/source/slang/slang-ir.cpp
index d03096483..6ca05d2d6 100644
--- a/source/slang/slang-ir.cpp
+++ b/source/slang/slang-ir.cpp
@@ -3951,10 +3951,9 @@ namespace Slang
return emitIntrinsicInst(type, kIROp_MakeStruct, argCount, args);
}
- IRInst* IRBuilder::emitMakeTensorView(IRType* type, IRInst* allocator, IRInst* val)
+ IRInst* IRBuilder::emitMakeTensorView(IRType* type, IRInst* val)
{
- IRInst* args[2] = { allocator, val };
- return emitIntrinsicInst(type, kIROp_MakeTensorView, 2, args);
+ return emitIntrinsicInst(type, kIROp_MakeTensorView, 1, &val);
}
IRInst* IRBuilder::emitMakeExistential(
diff --git a/source/slang/slang-stdlib.cpp b/source/slang/slang-stdlib.cpp
index 17fb3d28b..9d4a079c8 100644
--- a/source/slang/slang-stdlib.cpp
+++ b/source/slang/slang-stdlib.cpp
@@ -244,6 +244,9 @@ namespace Slang
{kIROp_Leq, "leq", "<=", "__BuiltinArithmeticType", ARITHMETIC_MASK | BOOL_RESULT},
};
+ // Integer types that can be used in atomic operations in CUDA.
+ static const char* kCudaAtomicIntegerTypes[] = { "int", "uint", "uint64_t", "int64_t" };
+
// Both the following functions use these macros.
// NOTE! They require a variable named path to emit the #line correctly if in source file.
#define SLANG_RAW(TEXT) sb << TEXT;
diff --git a/source/slang/slang.cpp b/source/slang/slang.cpp
index 79f9e56ba..b831859a5 100644
--- a/source/slang/slang.cpp
+++ b/source/slang/slang.cpp
@@ -1493,6 +1493,7 @@ CapabilitySet TargetRequest::getTargetCaps()
break;
case CodeGenTarget::CPPSource:
+ case CodeGenTarget::PyTorchCppBinding:
case CodeGenTarget::HostExecutable:
case CodeGenTarget::ShaderSharedLibrary:
case CodeGenTarget::HostHostCallable: