diff options
| author | Mukund Keshava <mkeshava@nvidia.com> | 2025-06-10 10:18:24 +0530 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-06-10 04:48:24 +0000 |
| commit | d70da65a90ccd73439895a43b3958c0ea1441f35 (patch) | |
| tree | e6f0c1cd8413e3e213a29bf233b5fc3a3fdf2eaf | |
| parent | ab6b5f28d332f201fd96b7e05070116684d02899 (diff) | |
Add optix support for coopvec (#7286)
* WiP: Add coopvec support for Optix
* format code
* fix minor issues
* Fix review comments
---------
Co-authored-by: slangbot <186143334+slangbot@users.noreply.github.com>
| -rw-r--r-- | docs/command-line-slangc-reference.md | 1 | ||||
| -rw-r--r-- | docs/user-guide/a3-02-reference-capability-atoms.md | 7 | ||||
| -rw-r--r-- | source/slang/hlsl.meta.slang | 82 | ||||
| -rw-r--r-- | source/slang/slang-capabilities.capdef | 6 | ||||
| -rw-r--r-- | source/slang/slang-emit-c-like.cpp | 1 | ||||
| -rw-r--r-- | source/slang/slang-emit-c-like.h | 3 | ||||
| -rw-r--r-- | source/slang/slang-emit-cpp.cpp | 12 | ||||
| -rw-r--r-- | tests/cooperative-vector/exp2.slang | 27 | ||||
| -rw-r--r-- | tests/cooperative-vector/log2.slang | 26 | ||||
| -rw-r--r-- | tests/cuda/optix-coopvec.slang | 137 |
10 files changed, 295 insertions, 7 deletions
diff --git a/docs/command-line-slangc-reference.md b/docs/command-line-slangc-reference.md index b3cd2576a..7d18799ba 100644 --- a/docs/command-line-slangc-reference.md +++ b/docs/command-line-slangc-reference.md @@ -1112,6 +1112,7 @@ A capability describes an optional feature that a target may or may not support. * `hlsl_nvapi` * `hlsl_2018` * `hlsl_coopvec_poc` +* `optix_coopvec` * `vertex` * `fragment` * `compute` diff --git a/docs/user-guide/a3-02-reference-capability-atoms.md b/docs/user-guide/a3-02-reference-capability-atoms.md index 56809055b..9740806ea 100644 --- a/docs/user-guide/a3-02-reference-capability-atoms.md +++ b/docs/user-guide/a3-02-reference-capability-atoms.md @@ -153,10 +153,10 @@ Versions > Represents HLSL NVAPI support. `hlsl_2018` -> Represet HLSL compatibility support. +> Represent HLSL compatibility support. `hlsl_coopvec_poc` -> Represet compatibility support for the deprecated POC DXC +> Represent compatibility support for the deprecated POC DXC `dxil_lib` > Represents capabilities required for DXIL Library compilation. @@ -1322,6 +1322,9 @@ Other ---------------------- *Capabilities which may be deprecated* +`optix_coopvec` +> Represents capabilities required for optix cooperative vector support. + `SPIRV_1_0` > Use `spirv_1_0` instead diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang index e00108e96..8b0bade6e 100644 --- a/source/slang/hlsl.meta.slang +++ b/source/slang/hlsl.meta.slang @@ -24382,6 +24382,7 @@ struct CoopVec<T : __BuiltinArithmeticType, let N : int> : IArray<T>, IArithmeti [ForceInline] [require(cooperative_vector)] [require(hlsl_coopvec_poc)] + [require(optix_coopvec)] __init<U : __BuiltinArithmeticType>(CoopVec<U, N> other) { this.copyFrom(other); @@ -24421,6 +24422,7 @@ struct CoopVec<T : __BuiltinArithmeticType, let N : int> : IArray<T>, IArithmeti [ForceInline] [require(cooperative_vector)] [require(hlsl_coopvec_poc)] + [require(optix_coopvec)] void copyFrom<U : __BuiltinArithmeticType>(CoopVec<U,N> other) { __target_switch @@ -24429,6 +24431,8 @@ struct CoopVec<T : __BuiltinArithmeticType, let N : int> : IArray<T>, IArithmeti __intrinsic_asm "$0 = $1"; case hlsl_coopvec_poc: __intrinsic_asm ".CopyFrom"; + case optix_coopvec: + __intrinsic_asm "optixCoopVecCvt<$TR>(*($0));"; default: if (__isFloat<T>() && __isInt<U>()) this = __int_to_float_cast<T>(other); @@ -24438,7 +24442,7 @@ struct CoopVec<T : __BuiltinArithmeticType, let N : int> : IArray<T>, IArithmeti this = __real_cast<T>(other); else if (__isInt<T>() && __isInt<U>()) this = __int_cast<T>(other); - } + } } /// Fill all elements of this CoopVec with the specified value. @@ -24591,6 +24595,7 @@ struct CoopVec<T : __BuiltinArithmeticType, let N : int> : IArray<T>, IArithmeti [__NoSideEffect] [require(cooperative_vector)] [require(hlsl_coopvec_poc)] + [require(optix_coopvec)] static CoopVec<T, N> load(ByteAddressBuffer buffer, int32_t byteOffset16ByteAligned = 0) { __target_switch @@ -24606,6 +24611,8 @@ struct CoopVec<T : __BuiltinArithmeticType, let N : int> : IArray<T>, IArithmeti CoopVec<T, N> ret; ret.__Load(buffer, byteOffset16ByteAligned); return ret; + case optix_coopvec: + __intrinsic_asm "optixCoopVecLoad<$TR>((CUdeviceptr)(&($0)));"; default: var vec = CoopVec<T, N>(); for(int i = 0; i < N; ++i) @@ -24618,6 +24625,7 @@ struct CoopVec<T : __BuiltinArithmeticType, let N : int> : IArray<T>, IArithmeti [__NoSideEffect] [require(cooperative_vector)] [require(hlsl_coopvec_poc)] + [require(optix_coopvec)] static CoopVec<T, N> load(RWByteAddressBuffer buffer, int32_t byteOffset16ByteAligned = 0) { __target_switch @@ -24633,6 +24641,8 @@ struct CoopVec<T : __BuiltinArithmeticType, let N : int> : IArray<T>, IArithmeti CoopVec<T, N> ret; ret.__Load(buffer, byteOffset16ByteAligned); return ret; + case optix_coopvec: + __intrinsic_asm "optixCoopVecLoad<$TR>((CUdeviceptr)(&($0)));"; default: var vec = CoopVec<T, N>(); for(int i = 0; i < N; ++i) @@ -24702,6 +24712,7 @@ struct CoopVec<T : __BuiltinArithmeticType, let N : int> : IArray<T>, IArithmeti [__NoSideEffect] [require(cooperative_vector)] [require(hlsl_coopvec_poc)] + [require(optix_coopvec)] static CoopVec<T, N> load<let M : int>(__constref groupshared const T[M] data, int32_t byteOffset16ByteAligned = 0) { static_assert(N <= M, "The destination vector size is smaller than the input."); @@ -24716,6 +24727,8 @@ struct CoopVec<T : __BuiltinArithmeticType, let N : int> : IArray<T>, IArithmeti CoopVec<T, N> ret; ret.__Load(data, __byteToElemOffset<T>(byteOffset16ByteAligned)); return ret; + case optix_coopvec: + __intrinsic_asm "optixCoopVecLoad<$TR>((CUdeviceptr)(&($0)));"; default: CoopVec<T,N> result; for(int i = 0; i < N; ++i) @@ -24922,6 +24935,7 @@ struct CoopVec<T : __BuiltinArithmeticType, let N : int> : IArray<T>, IArithmeti [ForceInline] [require(cooperative_vector)] [require(hlsl_coopvec_poc)] + [require(optix_coopvec)] This add(This other) { __target_switch @@ -24932,6 +24946,8 @@ struct CoopVec<T : __BuiltinArithmeticType, let N : int> : IArray<T>, IArithmeti This ret = this; ret.__mutAdd(other); return ret; + case optix_coopvec: + __intrinsic_asm "optixCoopVecAdd($0, $1)"; default: return __pureAdd(other); } } @@ -24957,6 +24973,7 @@ struct CoopVec<T : __BuiltinArithmeticType, let N : int> : IArray<T>, IArithmeti [ForceInline] [require(cooperative_vector)] [require(hlsl_coopvec_poc)] + [require(optix_coopvec)] This sub(This other) { __target_switch @@ -24967,6 +24984,8 @@ struct CoopVec<T : __BuiltinArithmeticType, let N : int> : IArray<T>, IArithmeti This ret = this; ret.__mutSub(other); return ret; + case optix_coopvec: + __intrinsic_asm "optixCoopVecSub($0, $1)"; default: return __pureSub(other); } } @@ -24992,6 +25011,7 @@ struct CoopVec<T : __BuiltinArithmeticType, let N : int> : IArray<T>, IArithmeti [ForceInline] [require(cooperative_vector)] [require(hlsl_coopvec_poc)] + [require(optix_coopvec)] This mul(This other) { __target_switch @@ -25002,6 +25022,8 @@ struct CoopVec<T : __BuiltinArithmeticType, let N : int> : IArray<T>, IArithmeti This ret = this; ret.__mutMul(other); return ret; + case optix_coopvec: + __intrinsic_asm "optixCoopVecMul($0, $1)"; default: return __pureMul(other); } } @@ -25621,6 +25643,7 @@ CoopVec<T, N> operator *(const T lhs, CoopVec<T, N> rhs) [ForceInline] [require(cooperative_vector)] [require(hlsl_coopvec_poc)] +[require(optix_coopvec)] CoopVec<T, N> min<T : __BuiltinFloatingPointType, let N : int>(CoopVec<T, N> x, CoopVec<T, N> y) { __target_switch @@ -25636,6 +25659,8 @@ CoopVec<T, N> min<T : __BuiltinFloatingPointType, let N : int>(CoopVec<T, N> x, CoopVec<T, N> ret = x; ret.__mutMin(y); return ret; + case optix_coopvec: + __intrinsic_asm "optixCoopVecMin($0, $1)"; default: CoopVec<T, N> ret; for(int i = 0; i < N; ++i) @@ -25648,6 +25673,7 @@ CoopVec<T, N> min<T : __BuiltinFloatingPointType, let N : int>(CoopVec<T, N> x, [ForceInline] [require(cooperative_vector)] [require(hlsl_coopvec_poc)] +[require(optix_coopvec)] CoopVec<T, N> max<T : __BuiltinFloatingPointType, let N : int>(CoopVec<T, N> x, CoopVec<T, N> y) { __target_switch @@ -25663,6 +25689,8 @@ CoopVec<T, N> max<T : __BuiltinFloatingPointType, let N : int>(CoopVec<T, N> x, CoopVec<T, N> ret = x; ret.__mutMax(y); return ret; + case optix_coopvec: + __intrinsic_asm "optixCoopVecMax($0, $1)"; default: CoopVec<T, N> ret; for(int i = 0; i < N; ++i) @@ -25809,6 +25837,7 @@ CoopVec<T, N> clamp<T : __BuiltinIntegerType, let N : int>(CoopVec<T, N> x, Coop // [ForceInline] [require(cooperative_vector)] [require(hlsl_coopvec_poc)] +[require(optix_coopvec)] CoopVec<T, N> step<T : __BuiltinFloatingPointType, let N : int>(CoopVec<T, N> edge, CoopVec<T, N> x) { __target_switch @@ -25825,6 +25854,8 @@ CoopVec<T, N> step<T : __BuiltinFloatingPointType, let N : int>(CoopVec<T, N> ed { result:$$CoopVec<T, N> = OpExtInst glsl450 Step $edge $x; }; + case optix_coopvec: + __intrinsic_asm "optixCoopVecStep($0, $1)"; default: CoopVec<T, N> ret; for(int i = 0; i < N; ++i) @@ -25890,6 +25921,43 @@ CoopVec<T, N> log<T : __BuiltinFloatingPointType, let N : int>(CoopVec<T, N> x) // [ForceInline] [require(cooperative_vector)] [require(hlsl_coopvec_poc)] +[require(optix_coopvec)] +CoopVec<T, N> log2<T : __BuiltinFloatingPointType, let N : int>(CoopVec<T, N> x) +{ + __target_switch + { + default: + CoopVec<T, N> ret; + for(int i = 0; i < N; ++i) + ret[i] = log2(x[i]); + return ret; + case optix_coopvec: + __intrinsic_asm "optixCoopVecLog2($0)"; + } +} + +// [ForceInline] +[require(cooperative_vector)] +[require(hlsl_coopvec_poc)] +[require(optix_coopvec)] +CoopVec<T, N> exp2<T : __BuiltinFloatingPointType, let N : int>(CoopVec<T, N> x) +{ + __target_switch + { + default: + CoopVec<T, N> ret; + for(int i = 0; i < N; ++i) + ret[i] = exp2(x[i]); + return ret; + case optix_coopvec: + __intrinsic_asm "optixCoopVecExp2($0)"; + } +} + +// [ForceInline] +[require(cooperative_vector)] +[require(hlsl_coopvec_poc)] +[require(optix_coopvec)] CoopVec<T, N> tanh<T : __BuiltinFloatingPointType, let N : int>(CoopVec<T, N> x) { __target_switch @@ -25906,6 +25974,8 @@ CoopVec<T, N> tanh<T : __BuiltinFloatingPointType, let N : int>(CoopVec<T, N> x) { result:$$CoopVec<T, N> = OpExtInst glsl450 Tanh $x; }; + case optix_coopvec: + __intrinsic_asm "optixCoopVecTanh($0)"; default: CoopVec<T, N> ret; for(int i = 0; i < N; ++i) @@ -25944,6 +26014,7 @@ CoopVec<T, N> atan<T : __BuiltinFloatingPointType, let N : int>(CoopVec<T, N> yO // [ForceInline] [require(cooperative_vector)] [require(hlsl_coopvec_poc)] +[require(optix_coopvec)] CoopVec<T, N> fma<T : __BuiltinFloatingPointType, let N : int>(CoopVec<T, N> a, CoopVec<T, N> b, CoopVec<T, N> c) { // TODO: Investigate, why does this fail if it's not inlined @@ -25963,6 +26034,8 @@ CoopVec<T, N> fma<T : __BuiltinFloatingPointType, let N : int>(CoopVec<T, N> a, { result:$$CoopVec<T, N> = OpExtInst glsl450 Fma $a $b $c; }; + case optix_coopvec: + __intrinsic_asm "optixCoopVecFFMA($0, $1, $2)"; default: CoopVec<T, N> ret; for(int i = 0; i < N; ++i) @@ -26695,6 +26768,7 @@ CoopVec<T, M> coopVecMatMulAddPacked<T : __BuiltinArithmeticType, let M : int, l [ForceInline] [require(cooperative_vector)] [require(hlsl_coopvec_poc)] +[require(optix_coopvec)] __generic<T : __BuiltinArithmeticType, let M : int, let K : int, U : __BuiltinArithmeticType> CoopVec<T, M> coopVecMatMulAdd( CoopVec<U, K> input, @@ -26746,6 +26820,7 @@ if(buffer.isRW) /// @param matrixInterpretation Specifies how to interpret the values in the matrix. [require(cooperative_vector)] [require(hlsl_coopvec_poc)] +[require(optix_coopvec)] void coopVecOuterProductAccumulate<T : __BuiltinArithmeticType, let M : int, let N : int>( CoopVec<T, M> a, CoopVec<T, N> b, @@ -26773,6 +26848,8 @@ void coopVecOuterProductAccumulate<T : __BuiltinArithmeticType, let M : int, let OpCapability CooperativeVectorTrainingNV; OpCooperativeVectorOuterProductAccumulateNV $matrixPtr $matrixOffset $a $b $memoryLayoutSpirv $matrixInterpretationSpirv $matrixStride; }; + case optix_coopvec: + __intrinsic_asm "optixCoopVecOuterProductAccumulate($0, $1, (CUdeviceptr)(&$2), $3, $4)"; default: for (int i = 0; i < M; ++i) { @@ -26836,6 +26913,7 @@ void coopVecOuterProductAccumulate<T : __BuiltinArithmeticType, let M : int, let /// @param offset Byte offset into the buffer. [require(cooperative_vector)] [require(hlsl_coopvec_poc)] +[require(optix_coopvec)] void coopVecReduceSumAccumulate<T : __BuiltinArithmeticType, let N : int>( CoopVec<T, N> v, $(buffer.type) buffer, @@ -26855,6 +26933,8 @@ void coopVecReduceSumAccumulate<T : __BuiltinArithmeticType, let N : int>( OpCapability CooperativeVectorTrainingNV; OpCooperativeVectorReduceSumAccumulateNV $bufferPtr $offset $v; }; + case optix_coopvec: + __intrinsic_asm "optixCoopVecReduceSumAccumulate($0, (CUdeviceptr)(&$1), $2)"; default: for (int i = 0; i < N; ++i) { diff --git a/source/slang/slang-capabilities.capdef b/source/slang/slang-capabilities.capdef index 7616cc201..343f89687 100644 --- a/source/slang/slang-capabilities.capdef +++ b/source/slang/slang-capabilities.capdef @@ -220,11 +220,11 @@ def _sm_6_9 : _sm_6_8; def hlsl_nvapi : hlsl; -/// Represet HLSL compatibility support. +/// Represent HLSL compatibility support. /// [Version] def hlsl_2018 : _sm_5_1; -/// Represet compatibility support for the deprecated POC DXC +/// Represent compatibility support for the deprecated POC DXC /// [Version] def hlsl_coopvec_poc : _sm_6_8; @@ -244,6 +244,8 @@ def _cuda_sm_6_0 : _cuda_sm_5_0; def _cuda_sm_7_0 : _cuda_sm_6_0; def _cuda_sm_8_0 : _cuda_sm_7_0; def _cuda_sm_9_0 : _cuda_sm_8_0; +/// Represents capabilities required for optix cooperative vector support. +def optix_coopvec : _cuda_sm_9_0; /// All code-gen targets /// [Compound] diff --git a/source/slang/slang-emit-c-like.cpp b/source/slang/slang-emit-c-like.cpp index 0092d159a..3fbf47bfa 100644 --- a/source/slang/slang-emit-c-like.cpp +++ b/source/slang/slang-emit-c-like.cpp @@ -112,6 +112,7 @@ CLikeSourceEmitter::CLikeSourceEmitter(const Desc& desc) auto targetCaps = getTargetReq()->getTargetCaps(); isCoopvecPoc = targetCaps.implies(CapabilityAtom::hlsl_coopvec_poc); + isOptixCoopVec = targetCaps.implies(CapabilityAtom::optix_coopvec); } SlangResult CLikeSourceEmitter::init() diff --git a/source/slang/slang-emit-c-like.h b/source/slang/slang-emit-c-like.h index 78793f655..1e9deaa0d 100644 --- a/source/slang/slang-emit-c-like.h +++ b/source/slang/slang-emit-c-like.h @@ -744,6 +744,9 @@ protected: // Indicates if we are emiting for DXC cooperative vector POC. bool isCoopvecPoc = false; + + // Indicates if we are emiting for Optix cooperative vector. + bool isOptixCoopVec = false; }; } // namespace Slang diff --git a/source/slang/slang-emit-cpp.cpp b/source/slang/slang-emit-cpp.cpp index 6f97a11da..8e95cebfb 100644 --- a/source/slang/slang-emit-cpp.cpp +++ b/source/slang/slang-emit-cpp.cpp @@ -1152,8 +1152,16 @@ void CPPSourceEmitter::_emitType(IRType* type, DeclaratorInfo* declarator) auto arrayType = static_cast<IRArrayType*>(type); auto elementType = arrayType->getElementType(); int elementCount = int(getIntVal(arrayType->getElementCount())); - - m_writer->emit("FixedArray<"); + auto nameHint = arrayType->findDecoration<IRNameHintDecoration>(); + bool isCoopVec = nameHint && (nameHint->getName() == UnownedStringSlice("CoopVec")); + if (isCoopVec && isOptixCoopVec) + { + m_writer->emit("OptixCoopVec<"); + } + else + { + m_writer->emit("FixedArray<"); + } _emitType(elementType, nullptr); m_writer->emit(", "); m_writer->emit(elementCount); diff --git a/tests/cooperative-vector/exp2.slang b/tests/cooperative-vector/exp2.slang new file mode 100644 index 000000000..ddff55453 --- /dev/null +++ b/tests/cooperative-vector/exp2.slang @@ -0,0 +1,27 @@ +//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK):-vk -render-feature cooperative-vector -output-using-type -emit-spirv-directly +//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK):-dx12 -render-feature cooperative-vector -dx12-experimental -use-dxil -output-using-type -profile cs_6_8 -Xslang... -Xdxc -Vd -X. -capability hlsl_coopvec_poc +//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK):-cpu -output-using-type + +// CHECK: type: float +// CHECK-NEXT: 2.000000 +// CHECK-NEXT: 4.000000 +// CHECK-NEXT: 8.000000 +// CHECK-NEXT: 16.000000 + + +//TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):out,name=outputBuffer +RWStructuredBuffer<float> outputBuffer; + +//TEST_INPUT:ubuffer(data=[1.0 2.0 3.0 4.0], stride=4),name=input +ByteAddressBuffer input; + +[numthreads(1, 1, 1)] +void computeMain() +{ + CoopVec<float, 4> vec = coopVecLoad<4, float>(input); + + CoopVec<float, 4> result = exp2(vec); + + for(int i = 0; i < result.getCount(); ++i) + outputBuffer[i] = result[i]; +} diff --git a/tests/cooperative-vector/log2.slang b/tests/cooperative-vector/log2.slang new file mode 100644 index 000000000..bacdf8fde --- /dev/null +++ b/tests/cooperative-vector/log2.slang @@ -0,0 +1,26 @@ +//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK):-vk -render-feature cooperative-vector -output-using-type -emit-spirv-directly +//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK):-dx12 -render-feature cooperative-vector -dx12-experimental -use-dxil -output-using-type -profile cs_6_8 -Xslang... -Xdxc -Vd -X. -capability hlsl_coopvec_poc +//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK):-cpu -output-using-type + +// CHECK: type: float +// CHECK-NEXT: 0.000000 +// CHECK-NEXT: 1.000000 +// CHECK-NEXT: 1.584962 +// CHECK-NEXT: 2.000000 + +//TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):out,name=outputBuffer +RWStructuredBuffer<float> outputBuffer; + +//TEST_INPUT:ubuffer(data=[1.0 2.0 3.0 4.0], stride=4),name=input +ByteAddressBuffer input; + +[numthreads(1, 1, 1)] +void computeMain() +{ + CoopVec<float, 4> vec = coopVecLoad<4, float>(input); + + CoopVec<float, 4> result = log2(vec); + + for(int i = 0; i < result.getCount(); ++i) + outputBuffer[i] = result[i]; +} diff --git a/tests/cuda/optix-coopvec.slang b/tests/cuda/optix-coopvec.slang new file mode 100644 index 000000000..58e83ebb9 --- /dev/null +++ b/tests/cuda/optix-coopvec.slang @@ -0,0 +1,137 @@ +//TEST:SIMPLE(filecheck=CHECK): -target cuda -capability optix_coopvec + +// CHECK: optixCoopVecLoad +// CHECK: OptixCoopVec +// CHECK: optixCoopVecTanh +// CHECK: optixCoopVecAdd +// CHECK: optixCoopVecCvt +// CHECK: optixCoopVecFFMA +// CHECK: optixCoopVecMax +// CHECK: optixCoopVecMin +// CHECK: optixCoopVecMul +// CHECK: optixCoopVecOuterProductAccumulate +// CHECK: optixCoopVecReduceSumAccumulate +// CHECK: optixCoopVecStep +// CHECK: optixCoopVecSub +// CHECK: optixCoopVecLog2 +// CHECK: optixCoopVecExp2 + + +//TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):out,name=outputBuffer +RWStructuredBuffer<float> outputBuffer; + +//TEST_INPUT:ubuffer(data=[1.0 2.0 3.0 4.0], stride=4),name=input1 +ByteAddressBuffer input1; + +//TEST_INPUT:ubuffer(data=[1.0 2.0 3.0 4.0], stride=4),name=input2 +ByteAddressBuffer input2; + +//TEST_INPUT:ubuffer(data=[1.0 2.0 3.0 4.0], stride=4),name=input3 +ByteAddressBuffer input3; + +//TEST_INPUT: set inputBuffer = ubuffer(data=[1 2 3 4 5 6 7 8 9 10 11 12], stride=4); +uniform int32_t* inputBuffer; + +//TEST_INPUT:ubuffer(data=[67305985 134678021 202050057 269422093], stride=4),name=matrix +//[1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16] +ByteAddressBuffer matrix; + +//TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0], stride=4),name=outputMat +RWByteAddressBuffer outputMat; + +//TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4),name=outputMat2 +RWByteAddressBuffer outputMat2; + +//TEST_INPUT:ubuffer(data=[5 6 7 8], stride=4),name=bias +ByteAddressBuffer bias; + +struct RayPayload +{ + float4 color; + float2x4 lssData; + bool isSphere; + bool isLss; +}; + + +[numthreads(1, 1, 1)] +[shader("closesthit")] +void closestHitShader(inout RayPayload payload, in BuiltInTriangleIntersectionAttributes attr) +{ + CoopVec<float, 4> vec1 = coopVecLoad<4, float>(input1); + CoopVec<float, 4> vec2 = coopVecLoad<4, float>(input2); + CoopVec<float, 4> vec3 = coopVecLoad<4, float>(input3); + + CoopVec<float, 4> resultTan = tanh(vec1); + + let resultAdd = vec1 + vec2; + + CoopVec<float, 4> resultCopy = coopVecLoad<4, float>(input1); + resultCopy.copyFrom<float>(vec2); + + CoopVec<float, 4> resultFMA = fma(vec1, vec2, vec3); + + CoopVec<float, 4> vec = coopVecLoad<4, float>(input1); + let resultMul = coopVecMatMulAdd<float, 4, 4>( + vec, + CoopVecComponentType::Float32, + matrix, + 0, + CoopVecComponentType::Float32, + bias, + 0, + CoopVecComponentType::SignedInt32, + CoopVecMatrixLayout::RowMajor, + false, + 4 + ); + + CoopVec<float, 4> resultMax = max(vec1, vec2); + CoopVec<float, 4> resultMin = min(vec1, vec2); + + CoopVec<float, 4> resultVecMul = vec1 * vec2; + + outputMat.Store<float>(0, float(1)); + coopVecOuterProductAccumulate( + vec1, + vec2, + outputMat, + 0, + 32, + CoopVecMatrixLayout::RowMajor, + CoopVecComponentType::Float32, + ); + + outputMat2.Store(0, float(1)); + coopVecReduceSumAccumulate( + vec1, + outputMat2, + 0, + ); + + CoopVec<float, 4> resultStep = step(vec1, vec2); + + CoopVec<float, 4> resultSub = vec1 - vec2; + + CoopVec<float, 4> resultLog2 = log2(vec1); + + CoopVec<float, 4> resultExp2 = exp2(vec1); + + for(int i = 0; i < resultTan.getCount(); ++i) + { + outputBuffer[i] = resultTan[i] + + resultAdd[i] + + resultCopy[i] + + resultFMA[i] + + resultMul[i] + + resultMax[i] + + resultMin[i] + + resultVecMul[i] + + outputMat.Load<float>(i) + + outputMat2.Load<float>(i) + + resultStep[i] + + resultSub[i] + + resultLog2[i] + + resultExp2[i]; + } +} |
