summaryrefslogtreecommitdiffstats
path: root/source/slang
diff options
context:
space:
mode:
authorjsmall-nvidia <jsmall@nvidia.com>2021-04-23 11:32:07 -0400
committerGitHub <noreply@github.com>2021-04-23 11:32:07 -0400
commit79e722338cd59aab74b4c57600c5ac6bce3bcd25 (patch)
treeaf77066235c4038bd15c6297ef4f48d3e562171d /source/slang
parenta47e7751c2738543e872452debc7494369c9fb35 (diff)
Preliminary CUDA Half support (#1808)
* #include an absolute path didn't work - because paths were taken to always be relative. * WIP CUDA half support. * Working support for half on CUDA - requires cuda_fp16.h and associated files can be found. * Fix for win32 for unused funcs. * Fix for Clang. * Hack to disable unused local function warning.
Diffstat (limited to 'source/slang')
-rw-r--r--source/slang/hlsl.meta.slang46
-rwxr-xr-xsource/slang/slang-compiler.cpp5
-rw-r--r--source/slang/slang-emit-cuda.cpp24
-rw-r--r--source/slang/slang-emit-cuda.h14
4 files changed, 73 insertions, 16 deletions
diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang
index 761016866..754b3ac63 100644
--- a/source/slang/hlsl.meta.slang
+++ b/source/slang/hlsl.meta.slang
@@ -1066,6 +1066,7 @@ matrix<uint,N,M> asuint(matrix<uint,N,M> x)
__target_intrinsic(hlsl)
__target_intrinsic(glsl, "uint16_t(packHalf2x16(vec2($0, 0.0)))")
+__target_intrinsic(cuda, "__half_as_ushort")
uint16_t asuint16(float16_t value);
vector<uint16_t,N> asuint16<let N : int>(vector<float16_t,N> value)
@@ -1078,6 +1079,7 @@ matrix<uint16_t,R,C> asuint16<let R : int, let C : int>(matrix<float16_t,R,C> va
__target_intrinsic(hlsl)
__target_intrinsic(glsl, "float16_t(unpackHalf2x16($0).x)")
+__target_intrinsic(cuda, "__ushort_as_half")
float16_t asfloat16(uint16_t value);
vector<float16_t,N> asfloat16<let N : int>(vector<uint16_t,N> value)
@@ -1088,11 +1090,16 @@ matrix<float16_t,R,C> asfloat16<let R : int, let C : int>(matrix<uint16_t,R,C> v
// Float<->signed cases:
-__target_intrinsic(hlsl) [__unsafeForceInlineEarly] int16_t asint16(float16_t value) { return asuint16(value); }
+__target_intrinsic(hlsl)
+__target_intrinsic(cuda, "__half_as_short")
+[__unsafeForceInlineEarly] int16_t asint16(float16_t value) { return asuint16(value); }
__target_intrinsic(hlsl) [__unsafeForceInlineEarly] vector<int16_t,N> asint16<let N : int>(vector<float16_t,N> value) { return asuint16(value); }
__target_intrinsic(hlsl) [__unsafeForceInlineEarly] matrix<int16_t,R,C> asint16<let R : int, let C : int>(matrix<float16_t,R,C> value) { return asuint16(value); }
-__target_intrinsic(hlsl) [__unsafeForceInlineEarly] float16_t asfloat16(int16_t value) { return asfloat16(asuint16(value)); }
+__target_intrinsic(hlsl)
+__target_intrinsic(cuda, "__short_as_half")
+[__unsafeForceInlineEarly] float16_t asfloat16(int16_t value) { return asfloat16(asuint16(value)); }
+
__target_intrinsic(hlsl) [__unsafeForceInlineEarly] vector<float16_t,N> asfloat16<let N : int>(vector<int16_t,N> value) { return asfloat16(asuint16(value)); }
__target_intrinsic(hlsl) [__unsafeForceInlineEarly] matrix<float16_t,R,C> asfloat16<let R : int, let C : int>(matrix<int16_t,R,C> value) { return asfloat16(asuint16(value)); }
@@ -1593,6 +1600,8 @@ vector<float, N> f16tof32(vector<uint, N> value)
VECTOR_MAP_UNARY(float, N, f16tof32, value);
}
+
+
// Convert to 16-bit float stored in low bits of integer
__target_intrinsic(glsl, "packHalf2x16(vec2($0,0.0))")
__glsl_version(420)
@@ -1606,6 +1615,39 @@ vector<uint, N> f32tof16(vector<float, N> value)
VECTOR_MAP_UNARY(uint, N, f32tof16, value);
}
+// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+// The following is Slang specific and NOT part of standard HLSL
+// It's not clear what happens with float16 time in HLSL -> can the float16 coerce to uint for example? If so that would
+// give the wrong result
+
+__target_intrinsic(glsl, "unpackHalf2x16($0).x")
+__target_intrinsic(cuda, "__half2float")
+__glsl_version(420)
+float f16tof32(float16_t value);
+
+__generic<let N : int>
+__target_intrinsic(hlsl)
+__target_intrinsic(cuda, "__half2float")
+vector<float, N> f16tof32(vector<float16_t, N> value)
+{
+ VECTOR_MAP_UNARY(float, N, f16tof32, value);
+}
+
+// Convert to float16_t
+__target_intrinsic(glsl, "packHalf2x16(vec2($0,0.0))")
+__glsl_version(420)
+__target_intrinsic(cuda, "__float2half")
+float16_t f32tof16_(float value);
+
+__generic<let N : int>
+__target_intrinsic(cuda, "__float2half")
+vector<float16_t, N> f32tof16_(vector<float, N> value)
+{
+ VECTOR_MAP_UNARY(uint, N, f32tof16, value);
+}
+
+// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
// Flip surface normal to face forward, if needed
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
diff --git a/source/slang/slang-compiler.cpp b/source/slang/slang-compiler.cpp
index 19a5fddf8..1d416634a 100755
--- a/source/slang/slang-compiler.cpp
+++ b/source/slang/slang-compiler.cpp
@@ -1421,6 +1421,11 @@ SlangResult dissassembleDXILUsingDXC(
options.requiredCapabilityVersions.add(version);
}
+
+ if (cudaTracker->isBaseTypeRequired(BaseType::Half))
+ {
+ options.flags |= CompileOptions::Flag::EnableFloat16;
+ }
}
options.sourceContents = source.source;
diff --git a/source/slang/slang-emit-cuda.cpp b/source/slang/slang-emit-cuda.cpp
index 2f5a9917d..a259ea933 100644
--- a/source/slang/slang-emit-cuda.cpp
+++ b/source/slang/slang-emit-cuda.cpp
@@ -27,7 +27,7 @@ static bool _isSingleNameBasicType(IROp op)
}
}
-/* static */ UnownedStringSlice CUDASourceEmitter::getBuiltinTypeName(IROp op)
+UnownedStringSlice CUDASourceEmitter::getBuiltinTypeName(IROp op)
{
switch (op)
{
@@ -44,8 +44,11 @@ static bool _isSingleNameBasicType(IROp op)
case kIROp_UIntType: return UnownedStringSlice("uint");
case kIROp_UInt64Type: return UnownedStringSlice("ulonglong");
- // Not clear just yet how we should handle half... we want all processing as float probly, but when reading/writing to memory converting
- case kIROp_HalfType: return UnownedStringSlice("half");
+ case kIROp_HalfType:
+ {
+ m_extensionTracker->requireBaseType(BaseType::Half);
+ return UnownedStringSlice("__half");
+ }
case kIROp_FloatType: return UnownedStringSlice("float");
case kIROp_DoubleType: return UnownedStringSlice("double");
@@ -54,7 +57,7 @@ static bool _isSingleNameBasicType(IROp op)
}
-/* static */ UnownedStringSlice CUDASourceEmitter::getVectorPrefix(IROp op)
+UnownedStringSlice CUDASourceEmitter::getVectorPrefix(IROp op)
{
switch (op)
{
@@ -70,8 +73,11 @@ static bool _isSingleNameBasicType(IROp op)
case kIROp_UIntType: return UnownedStringSlice("uint");
case kIROp_UInt64Type: return UnownedStringSlice("ulonglong");
- // Not clear just yet how we should handle half... we want all processing as float probly, but when reading/writing to memory converting
- case kIROp_HalfType: return UnownedStringSlice("half");
+ case kIROp_HalfType:
+ {
+ m_extensionTracker->requireBaseType(BaseType::Half);
+ return UnownedStringSlice("__half");
+ }
case kIROp_FloatType: return UnownedStringSlice("float");
case kIROp_DoubleType: return UnownedStringSlice("double");
@@ -160,12 +166,6 @@ SlangResult CUDASourceEmitter::calcTypeName(IRType* type, CodeGenTarget target,
switch (type->getOp())
{
- case kIROp_HalfType:
- {
- // Special case half
- out << getBuiltinTypeName(kIROp_FloatType);
- return SLANG_OK;
- }
case kIROp_VectorType:
{
auto vecType = static_cast<IRVectorType*>(type);
diff --git a/source/slang/slang-emit-cuda.h b/source/slang/slang-emit-cuda.h
index fefa40a11..a5d227c6b 100644
--- a/source/slang/slang-emit-cuda.h
+++ b/source/slang/slang-emit-cuda.h
@@ -11,7 +11,17 @@ class CUDAExtensionTracker : public RefObject
{
public:
+ typedef uint32_t BaseTypeFlags;
+
SemanticVersion m_smVersion;
+
+ void requireBaseType(BaseType baseType) { m_baseTypeFlags |= _getFlag(baseType); }
+ bool isBaseTypeRequired(BaseType baseType) { return (m_baseTypeFlags & _getFlag(baseType)) != 0; }
+
+protected:
+ static BaseTypeFlags _getFlag(BaseType baseType) { return BaseTypeFlags(1) << int(baseType); }
+
+ BaseTypeFlags m_baseTypeFlags = 0;
};
class CUDASourceEmitter : public CPPSourceEmitter
@@ -30,8 +40,8 @@ public:
};
};
- static UnownedStringSlice getBuiltinTypeName(IROp op);
- static UnownedStringSlice getVectorPrefix(IROp op);
+ UnownedStringSlice getBuiltinTypeName(IROp op);
+ UnownedStringSlice getVectorPrefix(IROp op);
virtual RefObject* getExtensionTracker() SLANG_OVERRIDE { return m_extensionTracker; }
virtual void emitTempModifiers(IRInst* temp) SLANG_OVERRIDE;