diff options
| author | jsmall-nvidia <jsmall@nvidia.com> | 2021-05-15 10:52:55 -0400 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2021-05-15 10:52:55 -0400 |
| commit | 1027225ac7ec8da0e471b633f358333c8a95b010 (patch) | |
| tree | 39575cd03fed47dffb56d7ca0ec7eff3385d1407 /source/slang/slang-intrinsic-expand.cpp | |
| parent | 1856b8ad85266ed66985b42bd2321a35f8573a00 (diff) | |
Support for HW format conversions for RWTexture on CUDA (#1840)
* #include an absolute path didn't work - because paths were taken to always be relative.
* Fix for writing to RWTexture with half types on CUDA.
* CUDA half functionality doc updates.
* First pass support for sust.p RWTexture format conversion on write.
* Tidy up implementation of $C.
Made clamping mode #define able.
* A simple test for RWTexture CUDA format conversion.
Diffstat (limited to 'source/slang/slang-intrinsic-expand.cpp')
| -rw-r--r-- | source/slang/slang-intrinsic-expand.cpp | 114 |
1 files changed, 114 insertions, 0 deletions
diff --git a/source/slang/slang-intrinsic-expand.cpp b/source/slang/slang-intrinsic-expand.cpp index c6cf65e5a..c1e886621 100644 --- a/source/slang/slang-intrinsic-expand.cpp +++ b/source/slang/slang-intrinsic-expand.cpp @@ -8,6 +8,7 @@ void IntrinsicExpandContext::emit(IRCall* inst, IRUse* args, Int argCount, const m_args = args; m_argCount = argCount; m_text = intrinsicText; + m_callInst = inst; const auto returnType = inst->getDataType(); @@ -59,6 +60,93 @@ void IntrinsicExpandContext::emit(IRCall* inst, IRUse* args, Int argCount, const } } +static BaseType _getBaseTypeFromScalarType(SlangScalarType type) +{ + switch (type) + { + case SLANG_SCALAR_TYPE_INT32: return BaseType::Int; + case SLANG_SCALAR_TYPE_UINT32: return BaseType::UInt; + case SLANG_SCALAR_TYPE_INT16: return BaseType::Int16; + case SLANG_SCALAR_TYPE_UINT16: return BaseType::UInt16; + case SLANG_SCALAR_TYPE_INT64: return BaseType::Int64; + case SLANG_SCALAR_TYPE_UINT64: return BaseType::UInt64; + case SLANG_SCALAR_TYPE_INT8: return BaseType::Int8; + case SLANG_SCALAR_TYPE_UINT8: return BaseType::UInt8; + case SLANG_SCALAR_TYPE_FLOAT16: return BaseType::Half; + case SLANG_SCALAR_TYPE_FLOAT32: return BaseType::Float; + case SLANG_SCALAR_TYPE_FLOAT64: return BaseType::Double; + case SLANG_SCALAR_TYPE_BOOL: return BaseType::Bool; + default: return BaseType::Void; + } +} + +// TODO(JS): There is an inherent problem here: +// +// TimF: The big gotcha you'd have with trying to look up the IRVar or whatever from an intrinsic is that it is very easy for the user to "smuggle" a resource-type value through an intermediate function: +// +// ``` +// Imagine this is user code... +// void f(RWTexture2D t) { t.YourOpThatYouAdded(...); }[attributeYouCareAbout(...)] +// RWTexture2D gTex; +// ... +// f(gTex); +// +// ``` +// +// So when emitting IR code for f, there is no way to trace t back to gTex and get at[attributeYouCareAbout(...)] +// Structurally, you can get back to the IRParam for t and that's it. +// And even if there was some magic way to trace back through the call site, you would run into the problem that some call sites +// might call f(gTex) and other might call f(gSomeOtherTex) and there is no guarantee the attributes on those two textures would match. +// +// The VK back-end gets away with this kind of coincidentally, since the "legalization" we have to do for resources means that there wouldn't be a single f() function any more. +// But for CUDA and C++ that's not the case or generally desirable. + +IRFormatDecoration* _findImageFormatDecoration(IRInst* inst) +{ + // JS(TODO): + // There could perhaps be other situations, that need to be covered + + // If this is a load, we need to get the decoration from the field key + if (IRLoad* load = as<IRLoad>(inst)) + { + if (IRFieldAddress* fieldAddress = as<IRFieldAddress>(load->getOperand(0))) + { + IRInst* field = fieldAddress->getField(); + return field->findDecoration<IRFormatDecoration>(); + } + } + // Otherwise just try on the instruction + return inst->findDecoration<IRFormatDecoration>(); +} + +bool _isImageFormatCompatible(ImageFormat imageFormat, IRType* dataType) +{ + int numElems = 1; + + if (auto vecType = as<IRVectorType>(dataType)) + { + numElems = int(getIntVal(vecType->getElementCount())); + dataType = vecType->getElementType(); + } + + BaseType baseType = BaseType::Void; + if (auto basicType = as<IRBasicType>(dataType)) + { + baseType = basicType->getBaseType(); + } + + const auto& imageFormatInfo = getImageFormatInfo(imageFormat); + const BaseType formatBaseType = _getBaseTypeFromScalarType(imageFormatInfo.scalarType); + + if (numElems != imageFormatInfo.channelCount) + { + SLANG_ASSERT(!"Format doesn't match channel count"); + return false; + } + + return formatBaseType == baseType; +} + const char* IntrinsicExpandContext::_emitSpecial(const char* cursor) { const char*const end = m_text.end(); @@ -168,6 +256,32 @@ const char* IntrinsicExpandContext::_emitSpecial(const char* cursor) } break; + case 'C': + { + // The $C intrinsic is a mechanism to change the name of an invocation depending on if there is a format + // conversion required between the type associated by the resource and the backing ImageFormat. + // Currently this is only implemented on CUDA, where there are specialized versions of the RWTexture + // writes that will do a format conversion. + if (m_emitter->getTarget() == CodeGenTarget::CUDASource) + { + IRInst* arg0 = m_callInst->getArg(0); + + if (IRFormatDecoration* formatDecoration = _findImageFormatDecoration(arg0)) + { + const ImageFormat imageFormat = formatDecoration->getFormat(); + auto textureType = as<IRTextureTypeBase>(arg0->getDataType()); + IRType* elementType = textureType ? textureType->getElementType() : nullptr; + + if (elementType && ! _isImageFormatCompatible(imageFormat, elementType)) + { + // Append _convert on the name to signify we need to use a code path, that will automatically + // do the format conversion. + m_writer->emit("_convert"); + } + } + } + break; + } case 'c': { // When doing texture access in glsl the result may need to be cast. |
