summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjsmall-nvidia <jsmall@nvidia.com>2021-05-15 11:22:14 -0400
committerGitHub <noreply@github.com>2021-05-15 11:22:14 -0400
commitbfe75618be81566882be8570b8db82ad5a2f8fe4 (patch)
tree1a319bee77f8faa4c09f385287d2dffdc569499e
parent1027225ac7ec8da0e471b633f358333c8a95b010 (diff)
Surface access on CUDA is byte addressed in X (#1841)
* #include an absolute path didn't work - because paths were taken to always be relative. * Fix for writing to RWTexture with half types on CUDA. * CUDA half functionality doc updates. * First pass support for sust.p RWTexture format conversion on write. * Tidy up implementation of $C. Made clamping mode #define able. * A simple test for RWTexture CUDA format conversion. * Use $E to fix byte addressing in X in CUDA. * Do not scale when accessing via _convert versions of surface functions.
-rw-r--r--prelude/slang-cuda-prelude.h3
-rw-r--r--source/slang/core.meta.slang11
-rw-r--r--source/slang/slang-intrinsic-expand.cpp80
3 files changed, 88 insertions, 6 deletions
diff --git a/prelude/slang-cuda-prelude.h b/prelude/slang-cuda-prelude.h
index a439d274a..a18da027b 100644
--- a/prelude/slang-cuda-prelude.h
+++ b/prelude/slang-cuda-prelude.h
@@ -385,6 +385,9 @@ SLANG_SURFACE_WRITE(surfCubemapLayeredwrite, (int x, int y, int layerFace), (x,
// Support for doing format conversion when writing to a surface/RWTexture
+// NOTE! For normal surface access x values are *byte* addressed.
+// For the _convert versions they are *not*. They don't need to be because sust.p does not require it.
+
template <typename T>
SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf1Dwrite_convert(T, cudaSurfaceObject_t surfObj, int x, cudaSurfaceBoundaryMode boundaryMode);
template <typename T>
diff --git a/source/slang/core.meta.slang b/source/slang/core.meta.slang
index c268c2a58..9e5cf80c8 100644
--- a/source/slang/core.meta.slang
+++ b/source/slang/core.meta.slang
@@ -1091,6 +1091,11 @@ for (int tt = 0; tt < kBaseTextureTypeCount; ++tt)
if (vecCount > 1)
{
sb << '.' << char(i + 'x');
+ // Surface access is *byte* addressed in x in CUDA
+ if (i == 0)
+ {
+ sb << " * $E";
+ }
}
}
@@ -1140,6 +1145,12 @@ for (int tt = 0; tt < kBaseTextureTypeCount; ++tt)
{
sb << '.' << char(i + 'x');
}
+
+ // Surface access is *byte* addressed in x in CUDA
+ if (i == 0)
+ {
+ sb << " * $E";
+ }
}
sb << ", SLANG_CUDA_BOUNDARY_MODE)\")\n";
diff --git a/source/slang/slang-intrinsic-expand.cpp b/source/slang/slang-intrinsic-expand.cpp
index c1e886621..c5bedbc37 100644
--- a/source/slang/slang-intrinsic-expand.cpp
+++ b/source/slang/slang-intrinsic-expand.cpp
@@ -101,7 +101,7 @@ static BaseType _getBaseTypeFromScalarType(SlangScalarType type)
// The VK back-end gets away with this kind of coincidentally, since the "legalization" we have to do for resources means that there wouldn't be a single f() function any more.
// But for CUDA and C++ that's not the case or generally desirable.
-IRFormatDecoration* _findImageFormatDecoration(IRInst* inst)
+static IRFormatDecoration* _findImageFormatDecoration(IRInst* inst)
{
// JS(TODO):
// There could perhaps be other situations, that need to be covered
@@ -119,7 +119,9 @@ IRFormatDecoration* _findImageFormatDecoration(IRInst* inst)
return inst->findDecoration<IRFormatDecoration>();
}
-bool _isImageFormatCompatible(ImageFormat imageFormat, IRType* dataType)
+// Returns true if dataType and imageFormat are compatible - that they have the same representation,
+// and no conversion is required.
+static bool _isImageFormatCompatible(ImageFormat imageFormat, IRType* dataType)
{
int numElems = 1;
@@ -147,6 +149,63 @@ bool _isImageFormatCompatible(ImageFormat imageFormat, IRType* dataType)
return formatBaseType == baseType;
}
+static bool _isConvertRequired(ImageFormat imageFormat, IRInst* resourceVar)
+{
+ auto textureType = as<IRTextureTypeBase>(resourceVar->getDataType());
+ IRType* elementType = textureType ? textureType->getElementType() : nullptr;
+ return elementType && !_isImageFormatCompatible(imageFormat, elementType);
+}
+
+static size_t _calcBackingElementSizeInBytes(IRInst* resourceVar)
+{
+ // First see if there is a format associated with the resource
+ if (IRFormatDecoration* formatDecoration = _findImageFormatDecoration(resourceVar))
+ {
+ const ImageFormat imageFormat = formatDecoration->getFormat();
+
+ if (_isConvertRequired(imageFormat, resourceVar))
+ {
+ // If the access is a converting access then the x coordinate is *NOT* scaled
+ // This is a CUDA specific issue(!).
+ return 1;
+ }
+
+ const auto& imageFormatInfo = getImageFormatInfo(imageFormat);
+ return imageFormatInfo.sizeInBytes;
+ }
+ else
+ {
+ // If not we *assume* the backing format is the same as the element type used for access.
+ /// Ie in RWTexture<T>, this would return sizeof(T)
+
+ auto textureType = as<IRTextureTypeBase>(resourceVar->getDataType());
+ IRType* elementType = textureType ? textureType->getElementType() : nullptr;
+
+ if (elementType)
+ {
+ int numElems = 1;
+
+ if (auto vecType = as<IRVectorType>(elementType))
+ {
+ numElems = int(getIntVal(vecType->getElementCount()));
+ elementType = vecType->getElementType();
+ }
+
+ BaseType baseType = BaseType::Void;
+ if (auto basicType = as<IRBasicType>(elementType))
+ {
+ baseType = basicType->getBaseType();
+ }
+
+ const auto& info = BaseTypeInfo::getInfo(baseType);
+ return info.sizeInBytes * numElems;
+ }
+ }
+
+ // When in doubt 4 is not a terrible guess based on limitations around DX11 etc
+ return 4;
+}
+
const char* IntrinsicExpandContext::_emitSpecial(const char* cursor)
{
const char*const end = m_text.end();
@@ -269,10 +328,7 @@ const char* IntrinsicExpandContext::_emitSpecial(const char* cursor)
if (IRFormatDecoration* formatDecoration = _findImageFormatDecoration(arg0))
{
const ImageFormat imageFormat = formatDecoration->getFormat();
- auto textureType = as<IRTextureTypeBase>(arg0->getDataType());
- IRType* elementType = textureType ? textureType->getElementType() : nullptr;
-
- if (elementType && ! _isImageFormatCompatible(imageFormat, elementType))
+ if (_isConvertRequired(imageFormat, arg0))
{
// Append _convert on the name to signify we need to use a code path, that will automatically
// do the format conversion.
@@ -282,6 +338,18 @@ const char* IntrinsicExpandContext::_emitSpecial(const char* cursor)
}
break;
}
+
+ case 'E':
+ {
+ /// Sometimes accesses need to be scaled. For example in CUDA the x coordinate for surface
+ /// access is byte addressed.
+ /// $E will return the byte size of the *backing element*.
+ size_t elemSizeInBytes = _calcBackingElementSizeInBytes(m_callInst->getArg(0));
+ SLANG_ASSERT(elemSizeInBytes > 0);
+ m_writer->emitUInt64(UInt64(elemSizeInBytes));
+ break;
+ }
+
case 'c':
{
// When doing texture access in glsl the result may need to be cast.