diff options
| author | jsmall-nvidia <jsmall@nvidia.com> | 2021-04-30 16:51:25 -0400 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2021-04-30 13:51:25 -0700 |
| commit | 1a4a51301d084dd1c8c5906eb810eb6caf6f3963 (patch) | |
| tree | 3eac138d918853f88bb8e2b5f14ed36a57e54d7a | |
| parent | c45f368ae404798db67a601749c6e0047fba75ef (diff) | |
Preliminary CUDA half maths (#1827)
* #include an absolute path didn't work - because paths were taken to always be relative.
* Split out StringEscapeUtil.
* Added StringEscapeUtil.
* Fix typo in unix quoting type.
* Small comment improvements.
* Try to fix linux linking issue.
* Fix typo.
* Attempt to fix linux link issue.
* Update VS proj even though nothing really changed.
* Fix another typo issue.
* Fix for windows issue.
Fixed bug.
* Make separate Utils for escaping.
* Fix typo.
* Split out into StringEscapeHandler.
* Windows shell does handle removing quotes (so remove code to remove them).
* Handle unescaping if not initiating using the shell.
* Slight improvement around shell like decoding.
* Simplify command extraction.
* Add shared-library category type.
* Fix bug in command extraction.
* Typo in transcendental category.
* Enable unit-test on in smoke test category.
* Make parsing failing output as a failing test.
* Fixes for transcendental tests. Disable tests that do not work.
* Changed category parsing.
* Removed the TestResult parameter from _gatherTestsForFile.
Made testsList only output.
* Remove testing if all tests were disabled.
* Make args of CommandLine always unescaped.
* Add category.
* Don't need escaping on unix/linux.
* Remove some no longer used functions.
* Add requireSMVersion to CUDAExtensionTracker.
* half-calc.slang now works for CUDA.
* bit-cast-16-bit works on CUDA.
* WIP handling of CUDA vector<half> types.
* Half swizzle CUDA.
* Half vector test.
* Fix swizzle half bug.
* Fix compilation issue with narrowing to Index.
Co-authored-by: Tim Foley <tfoleyNV@users.noreply.github.com>
| -rw-r--r-- | prelude/slang-cuda-prelude.h | 48 | ||||
| -rwxr-xr-x | source/slang/slang-compiler.cpp | 2 | ||||
| -rw-r--r-- | source/slang/slang-emit-cuda.cpp | 214 | ||||
| -rw-r--r-- | source/slang/slang-emit-cuda.h | 16 | ||||
| -rw-r--r-- | tests/compute/half-calc.slang | 2 | ||||
| -rw-r--r-- | tests/compute/half-vector-calc.slang | 35 | ||||
| -rw-r--r-- | tests/compute/half-vector-calc.slang.expected.txt | 5 | ||||
| -rw-r--r-- | tests/hlsl-intrinsic/bit-cast/bit-cast-16-bit.slang | 1 |
8 files changed, 274 insertions, 49 deletions
diff --git a/prelude/slang-cuda-prelude.h b/prelude/slang-cuda-prelude.h index c6de56641..61702824c 100644 --- a/prelude/slang-cuda-prelude.h +++ b/prelude/slang-cuda-prelude.h @@ -5,7 +5,7 @@ // are passed down. #ifdef SLANG_CUDA_ENABLE_HALF -#include <cuda_fp16.h> +# include <cuda_fp16.h> #endif #ifdef SLANG_CUDA_ENABLE_OPTIX @@ -65,8 +65,50 @@ struct __half4 { __half2 xy; __half2 zw; }; // Mechanism to make half vectors SLANG_FORCE_INLINE SLANG_CUDA_CALL __half2 make___half2(__half x, __half y) { return __halves2half2(x, y); } -SLANG_FORCE_INLINE SLANG_CUDA_CALL __half3 make___half3(__half x, __half y, __half z) { __half3 o; o.xy = __halves2half2(x, y); o.z = z; return o; } -SLANG_FORCE_INLINE SLANG_CUDA_CALL __half4 make___half4(__half x, __half y, __half z, __half w) { __half4 o; o.xy = __halves2half2(x, y); o.zw = __halves2half2(z, w); return o; } +SLANG_FORCE_INLINE SLANG_CUDA_CALL __half3 make___half3(__half x, __half y, __half z) { return __half3{ __halves2half2(x, y), z }; } +SLANG_FORCE_INLINE SLANG_CUDA_CALL __half4 make___half4(__half x, __half y, __half z, __half w) { return __half4{ __halves2half2(x, y), __halves2half2(z, w)}; } + +SLANG_FORCE_INLINE SLANG_CUDA_CALL __half2 constructFromScalar___half2(half x) { return __half2half2(x); } +SLANG_FORCE_INLINE SLANG_CUDA_CALL __half3 constructFromScalar___half3(half x) { return __half3{__half2half2(x), x}; } +SLANG_FORCE_INLINE SLANG_CUDA_CALL __half4 constructFromScalar___half4(half x) { const __half2 v = __half2half2(x); return __half4{v, v}; } + +// Half3 maths ops + +SLANG_FORCE_INLINE SLANG_CUDA_CALL __half3 operator+(const __half3 &lh, const __half3 &rh) { return __half3{__hadd2(lh.xy, rh.xy), __hadd(lh.z, rh.z)}; } +SLANG_FORCE_INLINE SLANG_CUDA_CALL __half3 operator-(const __half3 &lh, const __half3 &rh) { return __half3{__hsub2(lh.xy, rh.xy), __hsub(lh.z, rh.z)}; } +SLANG_FORCE_INLINE SLANG_CUDA_CALL __half3 operator*(const __half3 &lh, const __half3 &rh) { return __half3{__hmul2(lh.xy, rh.xy), __hmul(lh.z, rh.z)}; } +SLANG_FORCE_INLINE SLANG_CUDA_CALL __half3 operator/(const __half3 &lh, const __half3 &rh) { return __half3{__h2div(lh.xy, rh.xy), __hdiv(lh.z, rh.z)}; } + +SLANG_FORCE_INLINE SLANG_CUDA_CALL __half3 operator-(const __half3 &h) { return __half3{__hneg2(h.xy), __hneg(h.z)}; } + +#if 0 +// We need to return the vector<bool> type +SLANG_FORCE_INLINE SLANG_CUDA_CALL bool operator==(const __half3 &lh, const __half3 &rh) { return __hbeq2(lh.xy, rh.xy) && __heq(lh.z, rh.z); } +SLANG_FORCE_INLINE SLANG_CUDA_CALL bool operator!=(const __half3 &lh, const __half3 &rh) { return __hbneu2(lh.xy, rh.xy) && __hneu(lh.z, rh.z); } +SLANG_FORCE_INLINE SLANG_CUDA_CALL bool operator>(const __half3 &lh, const __half3 &rh) { return __hbgt2(lh.xy, rh.xy) && __hgt(lh.z, rh.z); } +SLANG_FORCE_INLINE SLANG_CUDA_CALL bool operator<(const __half3 &lh, const __half3 &rh) { return __hblt2(lh.xy, rh.xy) && __hlt(lh.z, rh.z); } +SLANG_FORCE_INLINE SLANG_CUDA_CALL bool operator>=(const __half3 &lh, const __half3 &rh) { return __hbge2(lh.xy, rh.xy) && __hge(lh.z, rh.z); } +SLANG_FORCE_INLINE SLANG_CUDA_CALL bool operator<=(const __half3 &lh, const __half3 &rh) { return __hble2(lh.xy, rh.xy) && __hle(lh.z, rh.z); } +#endif + +// Half4 maths ops + +SLANG_FORCE_INLINE SLANG_CUDA_CALL __half4 operator+(const __half4 &lh, const __half4 &rh) { return __half4{__hadd2(lh.xy, rh.xy), __hadd2(lh.zw, rh.zw)}; } +SLANG_FORCE_INLINE SLANG_CUDA_CALL __half4 operator-(const __half4 &lh, const __half4 &rh) { return __half4{__hsub2(lh.xy, rh.xy), __hsub2(lh.zw, rh.zw)}; } +SLANG_FORCE_INLINE SLANG_CUDA_CALL __half4 operator*(const __half4 &lh, const __half4 &rh) { return __half4{__hmul2(lh.xy, rh.xy), __hmul2(lh.zw, rh.zw)}; } +SLANG_FORCE_INLINE SLANG_CUDA_CALL __half4 operator/(const __half4 &lh, const __half4 &rh) { return __half4{__h2div(lh.xy, rh.xy), __h2div(lh.zw, rh.zw)}; } + +SLANG_FORCE_INLINE SLANG_CUDA_CALL __half4 operator-(const __half4 &h) { return __half4{__hneg2(h.xy), __hneg2(h.zw)}; } + +#if 0 +// We need to return vector<bool> type +SLANG_FORCE_INLINE SLANG_CUDA_CALL bool operator==(const __half4 &lh, const __half4 &rh) { return __hbeq2(lh.xy, rh.xy) && __hbeq2(lh.zw, rh.zw); } +SLANG_FORCE_INLINE SLANG_CUDA_CALL bool operator!=(const __half4 &lh, const __half4 &rh) { return __hbneu2(lh.xy, rh.xy) && __hbneu2(lh.zw, rh.zw); } +SLANG_FORCE_INLINE SLANG_CUDA_CALL bool operator>(const __half4 &lh, const __half4 &rh) { return __hbgt2(lh.xy, rh.xy) && __hbgt2(lh.zw, rh.zw); } +SLANG_FORCE_INLINE SLANG_CUDA_CALL bool operator<(const __half4 &lh, const __half4 &rh) { return __hblt2(lh.xy, rh.xy) && __hblt2(lh.zw, rh.zw); } +SLANG_FORCE_INLINE SLANG_CUDA_CALL bool operator>=(const __half4 &lh, const __half4 &rh) { return __hbge2(lh.xy, rh.xy) && __hbge2(lh.zw, rh.zw); } +SLANG_FORCE_INLINE SLANG_CUDA_CALL bool operator<=(const __half4 &lh, const __half4 &rh) { return __hble2(lh.xy, rh.xy) && __hble2(lh.zw, rh.zw); } +#endif // Use the round nearest as the default - it is the only one defined SLANG_FORCE_INLINE SLANG_CUDA_CALL __half2 __float22half2(const float2 a) { return __float22half2_rn(a); } diff --git a/source/slang/slang-compiler.cpp b/source/slang/slang-compiler.cpp index 1d416634a..736250219 100755 --- a/source/slang/slang-compiler.cpp +++ b/source/slang/slang-compiler.cpp @@ -1413,6 +1413,8 @@ SlangResult dissassembleDXILUsingDXC( // Look for the version if (auto cudaTracker = as<CUDAExtensionTracker>(source.extensionTracker)) { + cudaTracker->finalize(); + if (cudaTracker->m_smVersion.isSet()) { DownstreamCompiler::CapabilityVersion version; diff --git a/source/slang/slang-emit-cuda.cpp b/source/slang/slang-emit-cuda.cpp index a259ea933..5f7eada68 100644 --- a/source/slang/slang-emit-cuda.cpp +++ b/source/slang/slang-emit-cuda.cpp @@ -10,6 +10,21 @@ namespace Slang { + + +void CUDAExtensionTracker::finalize() +{ + if (isBaseTypeRequired(BaseType::Half)) + { + // The cuda_fp16.hpp header indicates the need is for version 5.3, but when this is tried + // NVRTC says it cannot load builtins. + // The lowest version that this does work for is 6.0, so that's what we use here. + + // https://docs.nvidia.com/cuda/nvrtc/index.html#group__options + requireSMVersion(SemanticVersion(6, 0)); + } +} + static bool _isSingleNameBasicType(IROp op) { switch (op) @@ -152,17 +167,74 @@ SlangResult CUDASourceEmitter::calcScalarFuncName(HLSLIntrinsic::Op op, IRBasicT return Super::calcScalarFuncName(op, type, outBuilder); } -SlangResult CUDASourceEmitter::calcTypeName(IRType* type, CodeGenTarget target, StringBuilder& out) +void CUDASourceEmitter::emitSpecializedOperationDefinition(const HLSLIntrinsic* specOp) { - SLANG_UNUSED(target); + typedef HLSLIntrinsic::Op Op; + + if (auto vecType = as <IRVectorType>(specOp->returnType)) + { + if (auto baseType = as<IRBasicType>(vecType->getElementType())) + { + if (baseType->getBaseType() == BaseType::Half) + { + switch (specOp->op) + { + case Op::Init: + case Op::Add: + case Op::Mul: + case Op::Div: + + case Op::Neg: + + case Op::ConstructFromScalar: + + case Op::Leq: + case Op::Less: + case Op::Greater: + case Op::Geq: + case Op::Neq: + case Op::Eql: + { + return; + } + } + } + } + } - if (target == CodeGenTarget::CSource) + switch (specOp->op) { - return Super::calcTypeName(type, target, out); + case Op::Init: + { + // Special case handling + auto returnType = specOp->returnType; + + if (auto vecType = as <IRVectorType>(returnType)) + { + if (auto baseType = as<IRBasicType>(vecType->getElementType())) + { + if (baseType->getBaseType() == BaseType::Half) + { + // Defined already in cuda-prelude.h + return; + } + } + } + + break; + } + default: break; } - // We allow C source, because if we need a name - SLANG_ASSERT(target == CodeGenTarget::CUDASource); + Super::emitSpecializedOperationDefinition(specOp); +} + +SlangResult CUDASourceEmitter::calcTypeName(IRType* type, CodeGenTarget target, StringBuilder& out) +{ + SLANG_UNUSED(target); + + // The names CUDA produces are all compatible with 'C' (ie they aren't templated types) + SLANG_ASSERT(target == CodeGenTarget::CUDASource || target == CodeGenTarget::CSource); switch (type->getOp()) { @@ -180,30 +252,6 @@ SlangResult CUDASourceEmitter::calcTypeName(IRType* type, CodeGenTarget target, out << prefix << vecCount; return SLANG_OK; } - -#if 0 - case kIROp_MatrixType: - { - auto matType = static_cast<IRMatrixType*>(type); - - auto elementType = matType->getElementType(); - const auto rowCount = int(getIntVal(matType->getRowCount())); - const auto colCount = int(getIntVal(matType->getColumnCount())); - - out << "Matrix<" << getBuiltinTypeName(elementType->op) << ", " << rowCount << ", " << colCount << ">"; - return SLANG_OK; - } - case kIROp_UnsizedArrayType: - { - auto arrayType = static_cast<IRUnsizedArrayType*>(type); - auto elementType = arrayType->getElementType(); - - out << "Array<"; - SLANG_RETURN_ON_FAIL(_calcTypeName(elementType, target, out)); - out << ">"; - return SLANG_OK; - } -#endif default: { if (isNominalOp(type->getOp())) @@ -519,10 +567,102 @@ void CUDASourceEmitter::_emitInitializerList(IRType* elementType, IRUse* operand m_writer->emit("\n}"); } +void CUDASourceEmitter::_emitGetHalfVectorElement(IRInst* base, Index index, Index vecSize, const EmitOpInfo& inOuterPrec) +{ + SLANG_ASSERT(index < vecSize); + + EmitOpInfo outerPrec = inOuterPrec; + + auto prec = getInfo(EmitOp::Postfix); + const bool needClose = maybeEmitParens(outerPrec, prec); + + emitOperand(base, leftSide(outerPrec, prec)); + + m_writer->emit("."); + + switch (vecSize) + { + default: + { + char const* kComponents[] = { "x", "y", "z", "w" }; + m_writer->emit(kComponents[index]); + break; + } + case 3: + { + char const* kComponents[] = { "xy.x", "xy.y", "z"}; + m_writer->emit(kComponents[index]); + break; + } + case 4: + { + char const* kComponents[] = { "xy.x", "xy.y", "zw.x", "zw.y" }; + m_writer->emit(kComponents[index]); + break; + } + } + + maybeCloseParens(needClose); +} + bool CUDASourceEmitter::tryEmitInstExprImpl(IRInst* inst, const EmitOpInfo& inOuterPrec) { switch(inst->getOp()) { + case kIROp_swizzle: + { + // We need to special case for half types. + auto swizzleInst = static_cast<IRSwizzle*>(inst); + + IRInst* baseInst = swizzleInst->getBase(); + IRType* baseType = baseInst->getDataType(); + + // If we are swizzling from a built in type, + if (as<IRBasicType>(baseType)) + { + // Just use the default behavior + } + else if (auto vecType = as<IRVectorType>(baseType)) + { + if (auto basicType = as<IRBasicType>(vecType->getElementType())) + { + if (basicType->getBaseType() == BaseType::Half) + { + const Index vecElementCount = Index(getIntVal(vecType->getElementCount())); + + const Index elementCount = Index(swizzleInst->getElementCount()); + if (elementCount == 1) + { + const Index index = Index(getIntVal(swizzleInst->getElementIndex(0))); + _emitGetHalfVectorElement(baseInst, index, vecElementCount, inOuterPrec); + } + else + { + auto outerPrec = getInfo(EmitOp::General); + + m_writer->emit("make___half"); + m_writer->emitInt64(elementCount); + m_writer->emit("("); + + for (Index i = 0; i < elementCount; ++i) + { + if (i) + { + m_writer->emit(", "); + } + + const Index index = Index(getIntVal(swizzleInst->getElementIndex(i))); + _emitGetHalfVectorElement(baseInst, index, vecElementCount, outerPrec); + } + + m_writer->emit(")"); + } + return true; + } + } + } + break; + } case kIROp_Construct: { // Simple constructor call @@ -558,7 +698,7 @@ bool CUDASourceEmitter::tryEmitInstExprImpl(IRInst* inst, const EmitOpInfo& inOu } case kIROp_WaveMaskBallot: { - _requireCUDASMVersion(SemanticVersion(7, 0)); + m_extensionTracker->requireSMVersion(SemanticVersion(7, 0)); m_writer->emit("__ballot_sync("); emitOperand(inst->getOperand(0), getInfo(EmitOp::General)); @@ -569,7 +709,7 @@ bool CUDASourceEmitter::tryEmitInstExprImpl(IRInst* inst, const EmitOpInfo& inOu } case kIROp_WaveMaskMatch: { - _requireCUDASMVersion(SemanticVersion(7, 0)); + m_extensionTracker->requireSMVersion(SemanticVersion(7, 0)); m_writer->emit("__match_any_sync("); emitOperand(inst->getOperand(0), getInfo(EmitOp::General)); @@ -584,14 +724,6 @@ bool CUDASourceEmitter::tryEmitInstExprImpl(IRInst* inst, const EmitOpInfo& inOu return Super::tryEmitInstExprImpl(inst, inOuterPrec); } -void CUDASourceEmitter::_requireCUDASMVersion(SemanticVersion const& version) -{ - if (version > m_extensionTracker->m_smVersion) - { - m_extensionTracker->m_smVersion = version; - } -} - void CUDASourceEmitter::handleRequiredCapabilitiesImpl(IRInst* inst) { // Does this function declare any requirements on CUDA capabilities @@ -603,7 +735,7 @@ void CUDASourceEmitter::handleRequiredCapabilitiesImpl(IRInst* inst) { SemanticVersion version; version.setFromInteger(SemanticVersion::IntegerType(smDecoration->getCUDASMVersion())); - _requireCUDASMVersion(version); + m_extensionTracker->requireSMVersion(version); } } } diff --git a/source/slang/slang-emit-cuda.h b/source/slang/slang-emit-cuda.h index a5d227c6b..b73948525 100644 --- a/source/slang/slang-emit-cuda.h +++ b/source/slang/slang-emit-cuda.h @@ -18,7 +18,14 @@ public: void requireBaseType(BaseType baseType) { m_baseTypeFlags |= _getFlag(baseType); } bool isBaseTypeRequired(BaseType baseType) { return (m_baseTypeFlags & _getFlag(baseType)) != 0; } + /// Ensure that the generated code is compiled for at least CUDA SM `version` + void requireSMVersion(const SemanticVersion& smVersion) { m_smVersion = (smVersion > m_smVersion) ? smVersion : m_smVersion; } + + /// Should be called before reading out values. + void finalize(); + protected: + static BaseTypeFlags _getFlag(BaseType baseType) { return BaseTypeFlags(1) << int(baseType); } BaseTypeFlags m_baseTypeFlags = 0; @@ -86,15 +93,16 @@ protected: // CPPSourceEmitter overrides virtual SlangResult calcTypeName(IRType* type, CodeGenTarget target, StringBuilder& out) SLANG_OVERRIDE; virtual SlangResult calcScalarFuncName(HLSLIntrinsic::Op op, IRBasicType* type, StringBuilder& outBuilder) SLANG_OVERRIDE; - + + virtual void emitSpecializedOperationDefinition(const HLSLIntrinsic* specOp) SLANG_OVERRIDE; + SlangResult _calcCUDATextureTypeName(IRTextureTypeBase* texType, StringBuilder& outName); void _emitInitializerList(IRType* elementType, IRUse* operands, Index operandCount); void _emitInitializerListValue(IRType* elementType, IRInst* value); - /// Ensure that the generated code is compiled for at least CUDA SM `version` - void _requireCUDASMVersion(SemanticVersion const& version); - + void _emitGetHalfVectorElement(IRInst* baseInst, Index index, Index vecSize, const EmitOpInfo& inOuterPrec); + RefPtr<CUDAExtensionTracker> m_extensionTracker; }; diff --git a/tests/compute/half-calc.slang b/tests/compute/half-calc.slang index 57efebe53..e0dd01315 100644 --- a/tests/compute/half-calc.slang +++ b/tests/compute/half-calc.slang @@ -1,6 +1,6 @@ //DISABLE_TEST(compute):COMPARE_COMPUTE:-dx12 -compute -use-dxil -profile cs_6_2 -render-features half -shaderobj //TEST(compute):COMPARE_COMPUTE:-vk -compute -profile cs_6_2 -render-features half -shaderobj - +//TEST(compute):COMPARE_COMPUTE:-cuda -compute -render-features half -shaderobj // Test for doing a calculation using half diff --git a/tests/compute/half-vector-calc.slang b/tests/compute/half-vector-calc.slang new file mode 100644 index 000000000..5594c38fd --- /dev/null +++ b/tests/compute/half-vector-calc.slang @@ -0,0 +1,35 @@ +//DISABLE_TEST(compute):COMPARE_COMPUTE:-dx12 -compute -output-using-type -use-dxil -profile cs_6_2 -render-features half -shaderobj +//TEST(compute):COMPARE_COMPUTE:-vk -compute -output-using-type -profile cs_6_2 -render-features half -shaderobj +//TEST(compute):COMPARE_COMPUTE:-cuda -compute -output-using-type -render-features half -shaderobj + +// Test for doing a calculation using half + +//TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):out,name outputBuffer +RWStructuredBuffer<float> outputBuffer; + +[numthreads(4, 1, 1)] +void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) +{ + uint tid = dispatchThreadID.x; + int x = tid.x; + + half2 v3 = half2(float(x)); + + half2 v0 = half2(x * 2.0f, x * 0.5f); + half3 v1 = half3(x * 2.0f, x * 0.5f, x - 1.0f); + half4 v2 = half4(x + 1, x - 1, x + 2 , x - 2); + + v1 += v0.yxy; + v1 += v2.wzy; + v2 += v0.xyxy; + + v0 = v0 + v0 * v0; + v1 = v1 + v1 * v1; + v2 = v2 + v2 * v2; + + half o2 = v2.x + v2.y + v2.z + v2.w; + half o1 = v1.x + v1.y + v1.z; + half o0 = v0.x + v0.y; + + outputBuffer[tid] = o0 + o1 + o2 + v3.y; +} diff --git a/tests/compute/half-vector-calc.slang.expected.txt b/tests/compute/half-vector-calc.slang.expected.txt new file mode 100644 index 000000000..64beb1dd1 --- /dev/null +++ b/tests/compute/half-vector-calc.slang.expected.txt @@ -0,0 +1,5 @@ +type: float +20.000000 +98.500000 +292.000000 +600.500000 diff --git a/tests/hlsl-intrinsic/bit-cast/bit-cast-16-bit.slang b/tests/hlsl-intrinsic/bit-cast/bit-cast-16-bit.slang index 0241ff9cd..28f8973f6 100644 --- a/tests/hlsl-intrinsic/bit-cast/bit-cast-16-bit.slang +++ b/tests/hlsl-intrinsic/bit-cast/bit-cast-16-bit.slang @@ -2,6 +2,7 @@ //TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -use-dxil -profile sm_6_2 -shaderobj //TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute -shaderobj +//TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute -shaderobj //TEST_INPUT:ubuffer(data=[0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16], stride=4):name inputBuffer RWStructuredBuffer<int> inputBuffer; |
