diff options
| author | jsmall-nvidia <jsmall@nvidia.com> | 2021-05-15 11:45:58 -0400 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2021-05-15 11:45:58 -0400 |
| commit | d5e8044d0a9723bb0bbd7ae1738d1157265da783 (patch) | |
| tree | d330e87e67646fd6e978e4debad17b4f7fbe2c40 /source | |
| parent | bfe75618be81566882be8570b8db82ad5a2f8fe4 (diff) | |
Read half->float RWTexture conversion (#1842)
* #include an absolute path didn't work - because paths were taken to always be relative.
* Fix for writing to RWTexture with half types on CUDA.
* CUDA half functionality doc updates.
* First pass support for sust.p RWTexture format conversion on write.
* Tidy up implementation of $C.
Made clamping mode #define able.
* A simple test for RWTexture CUDA format conversion.
* Add support for float2 and float4.
* WIP conversion testing.
* Use $E to fix byte addressing in X in CUDA.
* Do not scale when accessing via _convert versions of surface functions.
* Revert to previous test.
* Test with half/float convert write/read.
* More broad half->float read conversion testing.
* Improve documentation around half and RWTexture conversion.
Diffstat (limited to 'source')
| -rw-r--r-- | source/slang/core.meta.slang | 2 | ||||
| -rw-r--r-- | source/slang/slang-intrinsic-expand.cpp | 86 |
2 files changed, 64 insertions, 24 deletions
diff --git a/source/slang/core.meta.slang b/source/slang/core.meta.slang index 9e5cf80c8..6b73630a3 100644 --- a/source/slang/core.meta.slang +++ b/source/slang/core.meta.slang @@ -1083,7 +1083,7 @@ for (int tt = 0; tt < kBaseTextureTypeCount; ++tt) } sb << (isArray ? "Layered" : ""); - sb << "read<$T0>($0"; + sb << "read$C<$T0>($0"; for (int i = 0; i < vecCount; ++i) { diff --git a/source/slang/slang-intrinsic-expand.cpp b/source/slang/slang-intrinsic-expand.cpp index c5bedbc37..bd2e17b28 100644 --- a/source/slang/slang-intrinsic-expand.cpp +++ b/source/slang/slang-intrinsic-expand.cpp @@ -1,6 +1,8 @@ // slang-intrinsic-expand.cpp #include "slang-intrinsic-expand.h" +#include "slang-emit-cuda.h" + namespace Slang { void IntrinsicExpandContext::emit(IRCall* inst, IRUse* args, Int argCount, const UnownedStringSlice& intrinsicText) @@ -101,13 +103,13 @@ static BaseType _getBaseTypeFromScalarType(SlangScalarType type) // The VK back-end gets away with this kind of coincidentally, since the "legalization" we have to do for resources means that there wouldn't be a single f() function any more. // But for CUDA and C++ that's not the case or generally desirable. -static IRFormatDecoration* _findImageFormatDecoration(IRInst* inst) +static IRFormatDecoration* _findImageFormatDecoration(IRInst* resourceInst) { // JS(TODO): // There could perhaps be other situations, that need to be covered // If this is a load, we need to get the decoration from the field key - if (IRLoad* load = as<IRLoad>(inst)) + if (IRLoad* load = as<IRLoad>(resourceInst)) { if (IRFieldAddress* fieldAddress = as<IRFieldAddress>(load->getOperand(0))) { @@ -116,7 +118,7 @@ static IRFormatDecoration* _findImageFormatDecoration(IRInst* inst) } } // Otherwise just try on the instruction - return inst->findDecoration<IRFormatDecoration>(); + return resourceInst->findDecoration<IRFormatDecoration>(); } // Returns true if dataType and imageFormat are compatible - that they have the same representation, @@ -149,36 +151,26 @@ static bool _isImageFormatCompatible(ImageFormat imageFormat, IRType* dataType) return formatBaseType == baseType; } -static bool _isConvertRequired(ImageFormat imageFormat, IRInst* resourceVar) +static bool _isConvertRequired(ImageFormat imageFormat, IRInst* callee) { - auto textureType = as<IRTextureTypeBase>(resourceVar->getDataType()); + auto textureType = as<IRTextureTypeBase>(callee->getDataType()); IRType* elementType = textureType ? textureType->getElementType() : nullptr; return elementType && !_isImageFormatCompatible(imageFormat, elementType); } -static size_t _calcBackingElementSizeInBytes(IRInst* resourceVar) +static size_t _calcBackingElementSizeInBytes(IRInst* resourceInst) { // First see if there is a format associated with the resource - if (IRFormatDecoration* formatDecoration = _findImageFormatDecoration(resourceVar)) + if (IRFormatDecoration* formatDecoration = _findImageFormatDecoration(resourceInst)) { - const ImageFormat imageFormat = formatDecoration->getFormat(); - - if (_isConvertRequired(imageFormat, resourceVar)) - { - // If the access is a converting access then the x coordinate is *NOT* scaled - // This is a CUDA specific issue(!). - return 1; - } - - const auto& imageFormatInfo = getImageFormatInfo(imageFormat); - return imageFormatInfo.sizeInBytes; + return getImageFormatInfo(formatDecoration->getFormat()).sizeInBytes; } else { // If not we *assume* the backing format is the same as the element type used for access. /// Ie in RWTexture<T>, this would return sizeof(T) - auto textureType = as<IRTextureTypeBase>(resourceVar->getDataType()); + auto textureType = as<IRTextureTypeBase>(resourceInst->getDataType()); IRType* elementType = textureType ? textureType->getElementType() : nullptr; if (elementType) @@ -206,6 +198,18 @@ static size_t _calcBackingElementSizeInBytes(IRInst* resourceVar) return 4; } +static bool _isResourceRead(IRCall* call) +{ + IRType* returnType = call->getDataType(); + return returnType && (as<IRVoidType>(returnType) == nullptr); +} + +static bool _isResourceWrite(IRCall* call) +{ + IRType* returnType = call->getDataType(); + return returnType && (as<IRVoidType>(returnType) != nullptr); +} + const char* IntrinsicExpandContext::_emitSpecial(const char* cursor) { const char*const end = m_text.end(); @@ -323,13 +327,35 @@ const char* IntrinsicExpandContext::_emitSpecial(const char* cursor) // writes that will do a format conversion. if (m_emitter->getTarget() == CodeGenTarget::CUDASource) { - IRInst* arg0 = m_callInst->getArg(0); + IRInst* resourceInst = m_callInst->getArg(0); - if (IRFormatDecoration* formatDecoration = _findImageFormatDecoration(arg0)) + if (IRFormatDecoration* formatDecoration = _findImageFormatDecoration(resourceInst)) { const ImageFormat imageFormat = formatDecoration->getFormat(); - if (_isConvertRequired(imageFormat, arg0)) + if (_isConvertRequired(imageFormat, resourceInst)) { + // If the function returns something it's a reader so we may need to convert + // and in doing so require half + if (_isResourceRead(m_callInst)) + { + // If the source format if half derived, then we need to enable half + switch (imageFormat) + { + case ImageFormat::r16f: + case ImageFormat::rg16f: + case ImageFormat::rgba16f: + { + CUDAExtensionTracker* extensionTracker = as<CUDAExtensionTracker>(m_emitter->getExtensionTracker()); + if (extensionTracker) + { + extensionTracker->requireBaseType(BaseType::Half); + } + break; + } + default: break; + } + } + // Append _convert on the name to signify we need to use a code path, that will automatically // do the format conversion. m_writer->emit("_convert"); @@ -344,7 +370,21 @@ const char* IntrinsicExpandContext::_emitSpecial(const char* cursor) /// Sometimes accesses need to be scaled. For example in CUDA the x coordinate for surface /// access is byte addressed. /// $E will return the byte size of the *backing element*. - size_t elemSizeInBytes = _calcBackingElementSizeInBytes(m_callInst->getArg(0)); + + IRInst* resourceInst = m_callInst->getArg(0); + size_t elemSizeInBytes = _calcBackingElementSizeInBytes(resourceInst); + + // If we have a format converstion and its a *write* we don't need to scale + if (IRFormatDecoration* formatDecoration = _findImageFormatDecoration(resourceInst)) + { + const ImageFormat imageFormat = formatDecoration->getFormat(); + if (_isConvertRequired(imageFormat, resourceInst) && _isResourceWrite(m_callInst)) + { + // If there is a conversion *and* it's a write we don't need to scale. + elemSizeInBytes = 1; + } + } + SLANG_ASSERT(elemSizeInBytes > 0); m_writer->emitUInt64(UInt64(elemSizeInBytes)); break; |
