diff options
| author | jsmall-nvidia <jsmall@nvidia.com> | 2021-04-23 11:32:07 -0400 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2021-04-23 11:32:07 -0400 |
| commit | 79e722338cd59aab74b4c57600c5ac6bce3bcd25 (patch) | |
| tree | af77066235c4038bd15c6297ef4f48d3e562171d /source/slang | |
| parent | a47e7751c2738543e872452debc7494369c9fb35 (diff) | |
Preliminary CUDA Half support (#1808)
* #include an absolute path didn't work - because paths were taken to always be relative.
* WIP CUDA half support.
* Working support for half on CUDA - requires cuda_fp16.h and associated files can be found.
* Fix for win32 for unused funcs.
* Fix for Clang.
* Hack to disable unused local function warning.
Diffstat (limited to 'source/slang')
| -rw-r--r-- | source/slang/hlsl.meta.slang | 46 | ||||
| -rwxr-xr-x | source/slang/slang-compiler.cpp | 5 | ||||
| -rw-r--r-- | source/slang/slang-emit-cuda.cpp | 24 | ||||
| -rw-r--r-- | source/slang/slang-emit-cuda.h | 14 |
4 files changed, 73 insertions, 16 deletions
diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang index 761016866..754b3ac63 100644 --- a/source/slang/hlsl.meta.slang +++ b/source/slang/hlsl.meta.slang @@ -1066,6 +1066,7 @@ matrix<uint,N,M> asuint(matrix<uint,N,M> x) __target_intrinsic(hlsl) __target_intrinsic(glsl, "uint16_t(packHalf2x16(vec2($0, 0.0)))") +__target_intrinsic(cuda, "__half_as_ushort") uint16_t asuint16(float16_t value); vector<uint16_t,N> asuint16<let N : int>(vector<float16_t,N> value) @@ -1078,6 +1079,7 @@ matrix<uint16_t,R,C> asuint16<let R : int, let C : int>(matrix<float16_t,R,C> va __target_intrinsic(hlsl) __target_intrinsic(glsl, "float16_t(unpackHalf2x16($0).x)") +__target_intrinsic(cuda, "__ushort_as_half") float16_t asfloat16(uint16_t value); vector<float16_t,N> asfloat16<let N : int>(vector<uint16_t,N> value) @@ -1088,11 +1090,16 @@ matrix<float16_t,R,C> asfloat16<let R : int, let C : int>(matrix<uint16_t,R,C> v // Float<->signed cases: -__target_intrinsic(hlsl) [__unsafeForceInlineEarly] int16_t asint16(float16_t value) { return asuint16(value); } +__target_intrinsic(hlsl) +__target_intrinsic(cuda, "__half_as_short") +[__unsafeForceInlineEarly] int16_t asint16(float16_t value) { return asuint16(value); } __target_intrinsic(hlsl) [__unsafeForceInlineEarly] vector<int16_t,N> asint16<let N : int>(vector<float16_t,N> value) { return asuint16(value); } __target_intrinsic(hlsl) [__unsafeForceInlineEarly] matrix<int16_t,R,C> asint16<let R : int, let C : int>(matrix<float16_t,R,C> value) { return asuint16(value); } -__target_intrinsic(hlsl) [__unsafeForceInlineEarly] float16_t asfloat16(int16_t value) { return asfloat16(asuint16(value)); } +__target_intrinsic(hlsl) +__target_intrinsic(cuda, "__short_as_half") +[__unsafeForceInlineEarly] float16_t asfloat16(int16_t value) { return asfloat16(asuint16(value)); } + __target_intrinsic(hlsl) [__unsafeForceInlineEarly] vector<float16_t,N> asfloat16<let N : int>(vector<int16_t,N> value) { return asfloat16(asuint16(value)); } __target_intrinsic(hlsl) [__unsafeForceInlineEarly] matrix<float16_t,R,C> asfloat16<let R : int, let C : int>(matrix<int16_t,R,C> value) { return asfloat16(asuint16(value)); } @@ -1593,6 +1600,8 @@ vector<float, N> f16tof32(vector<uint, N> value) VECTOR_MAP_UNARY(float, N, f16tof32, value); } + + // Convert to 16-bit float stored in low bits of integer __target_intrinsic(glsl, "packHalf2x16(vec2($0,0.0))") __glsl_version(420) @@ -1606,6 +1615,39 @@ vector<uint, N> f32tof16(vector<float, N> value) VECTOR_MAP_UNARY(uint, N, f32tof16, value); } +// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +// The following is Slang specific and NOT part of standard HLSL +// It's not clear what happens with float16 time in HLSL -> can the float16 coerce to uint for example? If so that would +// give the wrong result + +__target_intrinsic(glsl, "unpackHalf2x16($0).x") +__target_intrinsic(cuda, "__half2float") +__glsl_version(420) +float f16tof32(float16_t value); + +__generic<let N : int> +__target_intrinsic(hlsl) +__target_intrinsic(cuda, "__half2float") +vector<float, N> f16tof32(vector<float16_t, N> value) +{ + VECTOR_MAP_UNARY(float, N, f16tof32, value); +} + +// Convert to float16_t +__target_intrinsic(glsl, "packHalf2x16(vec2($0,0.0))") +__glsl_version(420) +__target_intrinsic(cuda, "__float2half") +float16_t f32tof16_(float value); + +__generic<let N : int> +__target_intrinsic(cuda, "__float2half") +vector<float16_t, N> f32tof16_(vector<float, N> value) +{ + VECTOR_MAP_UNARY(uint, N, f32tof16, value); +} + +// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + // Flip surface normal to face forward, if needed __generic<T : __BuiltinFloatingPointType, let N : int> __target_intrinsic(hlsl) diff --git a/source/slang/slang-compiler.cpp b/source/slang/slang-compiler.cpp index 19a5fddf8..1d416634a 100755 --- a/source/slang/slang-compiler.cpp +++ b/source/slang/slang-compiler.cpp @@ -1421,6 +1421,11 @@ SlangResult dissassembleDXILUsingDXC( options.requiredCapabilityVersions.add(version); } + + if (cudaTracker->isBaseTypeRequired(BaseType::Half)) + { + options.flags |= CompileOptions::Flag::EnableFloat16; + } } options.sourceContents = source.source; diff --git a/source/slang/slang-emit-cuda.cpp b/source/slang/slang-emit-cuda.cpp index 2f5a9917d..a259ea933 100644 --- a/source/slang/slang-emit-cuda.cpp +++ b/source/slang/slang-emit-cuda.cpp @@ -27,7 +27,7 @@ static bool _isSingleNameBasicType(IROp op) } } -/* static */ UnownedStringSlice CUDASourceEmitter::getBuiltinTypeName(IROp op) +UnownedStringSlice CUDASourceEmitter::getBuiltinTypeName(IROp op) { switch (op) { @@ -44,8 +44,11 @@ static bool _isSingleNameBasicType(IROp op) case kIROp_UIntType: return UnownedStringSlice("uint"); case kIROp_UInt64Type: return UnownedStringSlice("ulonglong"); - // Not clear just yet how we should handle half... we want all processing as float probly, but when reading/writing to memory converting - case kIROp_HalfType: return UnownedStringSlice("half"); + case kIROp_HalfType: + { + m_extensionTracker->requireBaseType(BaseType::Half); + return UnownedStringSlice("__half"); + } case kIROp_FloatType: return UnownedStringSlice("float"); case kIROp_DoubleType: return UnownedStringSlice("double"); @@ -54,7 +57,7 @@ static bool _isSingleNameBasicType(IROp op) } -/* static */ UnownedStringSlice CUDASourceEmitter::getVectorPrefix(IROp op) +UnownedStringSlice CUDASourceEmitter::getVectorPrefix(IROp op) { switch (op) { @@ -70,8 +73,11 @@ static bool _isSingleNameBasicType(IROp op) case kIROp_UIntType: return UnownedStringSlice("uint"); case kIROp_UInt64Type: return UnownedStringSlice("ulonglong"); - // Not clear just yet how we should handle half... we want all processing as float probly, but when reading/writing to memory converting - case kIROp_HalfType: return UnownedStringSlice("half"); + case kIROp_HalfType: + { + m_extensionTracker->requireBaseType(BaseType::Half); + return UnownedStringSlice("__half"); + } case kIROp_FloatType: return UnownedStringSlice("float"); case kIROp_DoubleType: return UnownedStringSlice("double"); @@ -160,12 +166,6 @@ SlangResult CUDASourceEmitter::calcTypeName(IRType* type, CodeGenTarget target, switch (type->getOp()) { - case kIROp_HalfType: - { - // Special case half - out << getBuiltinTypeName(kIROp_FloatType); - return SLANG_OK; - } case kIROp_VectorType: { auto vecType = static_cast<IRVectorType*>(type); diff --git a/source/slang/slang-emit-cuda.h b/source/slang/slang-emit-cuda.h index fefa40a11..a5d227c6b 100644 --- a/source/slang/slang-emit-cuda.h +++ b/source/slang/slang-emit-cuda.h @@ -11,7 +11,17 @@ class CUDAExtensionTracker : public RefObject { public: + typedef uint32_t BaseTypeFlags; + SemanticVersion m_smVersion; + + void requireBaseType(BaseType baseType) { m_baseTypeFlags |= _getFlag(baseType); } + bool isBaseTypeRequired(BaseType baseType) { return (m_baseTypeFlags & _getFlag(baseType)) != 0; } + +protected: + static BaseTypeFlags _getFlag(BaseType baseType) { return BaseTypeFlags(1) << int(baseType); } + + BaseTypeFlags m_baseTypeFlags = 0; }; class CUDASourceEmitter : public CPPSourceEmitter @@ -30,8 +40,8 @@ public: }; }; - static UnownedStringSlice getBuiltinTypeName(IROp op); - static UnownedStringSlice getVectorPrefix(IROp op); + UnownedStringSlice getBuiltinTypeName(IROp op); + UnownedStringSlice getVectorPrefix(IROp op); virtual RefObject* getExtensionTracker() SLANG_OVERRIDE { return m_extensionTracker; } virtual void emitTempModifiers(IRInst* temp) SLANG_OVERRIDE; |
