diff options
| -rw-r--r-- | prelude/slang-cuda-prelude.h | 33 | ||||
| -rw-r--r-- | source/slang/core.meta.slang | 2 | ||||
| -rw-r--r-- | source/slang/slang-ast-support-types.h | 12 | ||||
| -rw-r--r-- | source/slang/slang-image-format-defs.h | 80 | ||||
| -rw-r--r-- | source/slang/slang-intrinsic-expand.cpp | 114 | ||||
| -rw-r--r-- | source/slang/slang-intrinsic-expand.h | 1 | ||||
| -rw-r--r-- | source/slang/slang-syntax.cpp | 40 | ||||
| -rw-r--r-- | tests/compute/half-rw-texture-convert.slang | 42 | ||||
| -rw-r--r-- | tests/compute/half-rw-texture-convert.slang.expected.txt | 5 |
9 files changed, 267 insertions, 62 deletions
diff --git a/prelude/slang-cuda-prelude.h b/prelude/slang-cuda-prelude.h index a00e8f744..a439d274a 100644 --- a/prelude/slang-cuda-prelude.h +++ b/prelude/slang-cuda-prelude.h @@ -63,6 +63,15 @@ #ifndef SLANG_CUDA_BOUNDARY_MODE # define SLANG_CUDA_BOUNDARY_MODE cudaBoundaryModeZero + +// Can be one of SLANG_CUDA_PTX_BOUNDARY_MODE. Only applies *PTX* emitted CUDA operations +// which currently is just RWTextureRW format writes +// +// .trap causes an execution trap on out-of-bounds addresses +// .clamp stores data at the nearest surface location (sized appropriately) +// .zero drops stores to out-of-bounds addresses + +# define SLANG_PTX_BOUNDARY_MODE "zero" #endif struct TypeInfo @@ -371,9 +380,31 @@ SLANG_SURFACE_WRITE(surf1DLayeredwrite, (int x, int layer), (x, layer)) SLANG_SURFACE_WRITE(surf2DLayeredwrite, (int x, int y, int layer), (x, y, layer)) SLANG_SURFACE_WRITE(surfCubemapwrite, (int x, int y, int face), (x, y, face)) SLANG_SURFACE_WRITE(surfCubemapLayeredwrite, (int x, int y, int layerFace), (x, y, layerFace)) - + #endif +// Support for doing format conversion when writing to a surface/RWTexture + +template <typename T> +SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf1Dwrite_convert(T, cudaSurfaceObject_t surfObj, int x, cudaSurfaceBoundaryMode boundaryMode); +template <typename T> +SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf2Dwrite_convert(T, cudaSurfaceObject_t surfObj, int x, int y, cudaSurfaceBoundaryMode boundaryMode); + +// https://docs.nvidia.com/cuda/inline-ptx-assembly/index.html +// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#surface-instructions-sust + +template <> +SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf1Dwrite_convert<float>(float v, cudaSurfaceObject_t surfObj, int x, cudaSurfaceBoundaryMode boundaryMode) +{ + asm volatile ( "{sust.p.1d.b32." SLANG_PTX_BOUNDARY_MODE " [%0, {%1}], {%2};}\n\t" :: "l"(surfObj),"r"(x),"f"(v)); +} + +template <> +SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf2Dwrite_convert<float>(float v, cudaSurfaceObject_t surfObj, int x, int y, cudaSurfaceBoundaryMode boundaryMode) +{ + asm volatile ( "{sust.p.2d.b32." SLANG_PTX_BOUNDARY_MODE " [%0, {%1,%2}], {%3};}\n\t" :: "l"(surfObj),"r"(x),"r"(y),"f"(v)); +} + // ----------------------------- F32 ----------------------------------------- // Unary diff --git a/source/slang/core.meta.slang b/source/slang/core.meta.slang index 934d30c18..c268c2a58 100644 --- a/source/slang/core.meta.slang +++ b/source/slang/core.meta.slang @@ -1132,7 +1132,7 @@ for (int tt = 0; tt < kBaseTextureTypeCount; ++tt) } sb << (isArray ? "Layered" : ""); - sb << "write<$T0>($2, $0"; + sb << "write$C<$T0>($2, $0"; for (int i = 0; i < vecCount; ++i) { sb << ", ($1)"; diff --git a/source/slang/slang-ast-support-types.h b/source/slang/slang-ast-support-types.h index 168d358d6..c172ea83c 100644 --- a/source/slang/slang-ast-support-types.h +++ b/source/slang/slang-ast-support-types.h @@ -122,10 +122,20 @@ namespace Slang enum class ImageFormat { -#define FORMAT(NAME) NAME, +#define FORMAT(NAME, OTHER) NAME, #include "slang-image-format-defs.h" }; + struct ImageFormatInfo + { + SlangScalarType scalarType; ///< If image format is not made up of channels of set sizes this will be SLANG_SCALAR_TYPE_NONE + uint8_t channelCount; ///< The number of channels + uint8_t sizeInBytes; ///< Size in bytes + UnownedStringSlice name; ///< The name associated with this type. NOTE! Currently these names *are* the GLSL format names. + }; + + const ImageFormatInfo& getImageFormatInfo(ImageFormat format); + bool findImageFormatByName(char const* name, ImageFormat* outFormat); char const* getGLSLNameForImageFormat(ImageFormat format); diff --git a/source/slang/slang-image-format-defs.h b/source/slang/slang-image-format-defs.h index aa6ffec50..25dbae16a 100644 --- a/source/slang/slang-image-format-defs.h +++ b/source/slang/slang-image-format-defs.h @@ -3,45 +3,45 @@ #error Must define FORMAT macro before including image-format-defs.h #endif -FORMAT(unknown) -FORMAT(rgba32f) -FORMAT(rgba16f) -FORMAT(rg32f) -FORMAT(rg16f) -FORMAT(r11f_g11f_b10f) -FORMAT(r32f) -FORMAT(r16f) -FORMAT(rgba16) -FORMAT(rgb10_a2) -FORMAT(rgba8) -FORMAT(rg16) -FORMAT(rg8) -FORMAT(r16) -FORMAT(r8) -FORMAT(rgba16_snorm) -FORMAT(rgba8_snorm) -FORMAT(rg16_snorm) -FORMAT(rg8_snorm) -FORMAT(r16_snorm) -FORMAT(r8_snorm) -FORMAT(rgba32i) -FORMAT(rgba16i) -FORMAT(rgba8i) -FORMAT(rg32i) -FORMAT(rg16i) -FORMAT(rg8i) -FORMAT(r32i) -FORMAT(r16i) -FORMAT(r8i) -FORMAT(rgba32ui) -FORMAT(rgba16ui) -FORMAT(rgb10_a2ui) -FORMAT(rgba8ui) -FORMAT(rg32ui) -FORMAT(rg16ui) -FORMAT(rg8ui) -FORMAT(r32ui) -FORMAT(r16ui) -FORMAT(r8ui) +FORMAT(unknown, (NONE, 0, 0)) +FORMAT(rgba32f, (FLOAT32, 4, sizeof(float) * 4)) +FORMAT(rgba16f, (FLOAT16, 4, sizeof(uint16_t) * 4)) +FORMAT(rg32f, (FLOAT32, 2, sizeof(float) * 2)) +FORMAT(rg16f, (FLOAT16, 2, sizeof(uint16_t) * 2)) +FORMAT(r11f_g11f_b10f, (NONE, 3, sizeof(uint32_t))) +FORMAT(r32f, (FLOAT32, 1, sizeof(float))) +FORMAT(r16f, (FLOAT16, 1, sizeof(uint16_t))) +FORMAT(rgba16, (UINT16, 4, sizeof(uint16_t) * 4)) +FORMAT(rgb10_a2, (NONE, 4, sizeof(uint32_t))) +FORMAT(rgba8, (UINT8, 4, sizeof(uint32_t))) +FORMAT(rg16, (UINT16, 2, sizeof(uint16_t) * 2 )) +FORMAT(rg8, (UINT8, 2, sizeof(char) * 2)) +FORMAT(r16, (UINT16, 1, sizeof(uint16_t))) +FORMAT(r8, (UINT8, 1, sizeof(uint8_t))) +FORMAT(rgba16_snorm, (UINT16, 4, sizeof(uint16_t) * 4)) +FORMAT(rgba8_snorm, (UINT8, 4, sizeof(uint8_t) * 4)) +FORMAT(rg16_snorm, (UINT16, 2, sizeof(uint16_t) * 2)) +FORMAT(rg8_snorm, (UINT8, 2, sizeof(uint8_t) * 2)) +FORMAT(r16_snorm, (UINT16, 1, sizeof(uint16_t))) +FORMAT(r8_snorm, (UINT8, 1, sizeof(uint8_t))) +FORMAT(rgba32i, (INT32, 4, sizeof(int32_t) * 4)) +FORMAT(rgba16i, (INT16, 4, sizeof(int16_t) * 4)) +FORMAT(rgba8i, (INT8, 4, sizeof(int8_t) * 4)) +FORMAT(rg32i, (INT32, 2, sizeof(int32_t) * 2)) +FORMAT(rg16i, (INT16, 2, sizeof(int16_t) * 2)) +FORMAT(rg8i, (INT8, 2, sizeof(int8_t) * 2)) +FORMAT(r32i, (INT32, 1, sizeof(int32_t))) +FORMAT(r16i, (INT16, 1, sizeof(int16_t))) +FORMAT(r8i, (INT8, 1, sizeof(int8_t))) +FORMAT(rgba32ui, (UINT32, 4, sizeof(uint32_t) * 4)) +FORMAT(rgba16ui, (UINT16, 4, sizeof(uint16_t) * 4)) +FORMAT(rgb10_a2ui, (NONE, 4, sizeof(uint32_t))) +FORMAT(rgba8ui, (UINT8, 4, sizeof(uint8_t) * 4)) +FORMAT(rg32ui, (UINT32, 2, sizeof(uint32_t) * 2)) +FORMAT(rg16ui, (UINT16, 2, sizeof(uint16_t) * 2)) +FORMAT(rg8ui, (UINT8, 2, sizeof(uint8_t) * 2)) +FORMAT(r32ui, (UINT32, 1, sizeof(uint32_t))) +FORMAT(r16ui, (UINT16, 1, sizeof(uint16_t))) +FORMAT(r8ui, (UINT8, 1, sizeof(uint8_t))) #undef FORMAT diff --git a/source/slang/slang-intrinsic-expand.cpp b/source/slang/slang-intrinsic-expand.cpp index c6cf65e5a..c1e886621 100644 --- a/source/slang/slang-intrinsic-expand.cpp +++ b/source/slang/slang-intrinsic-expand.cpp @@ -8,6 +8,7 @@ void IntrinsicExpandContext::emit(IRCall* inst, IRUse* args, Int argCount, const m_args = args; m_argCount = argCount; m_text = intrinsicText; + m_callInst = inst; const auto returnType = inst->getDataType(); @@ -59,6 +60,93 @@ void IntrinsicExpandContext::emit(IRCall* inst, IRUse* args, Int argCount, const } } +static BaseType _getBaseTypeFromScalarType(SlangScalarType type) +{ + switch (type) + { + case SLANG_SCALAR_TYPE_INT32: return BaseType::Int; + case SLANG_SCALAR_TYPE_UINT32: return BaseType::UInt; + case SLANG_SCALAR_TYPE_INT16: return BaseType::Int16; + case SLANG_SCALAR_TYPE_UINT16: return BaseType::UInt16; + case SLANG_SCALAR_TYPE_INT64: return BaseType::Int64; + case SLANG_SCALAR_TYPE_UINT64: return BaseType::UInt64; + case SLANG_SCALAR_TYPE_INT8: return BaseType::Int8; + case SLANG_SCALAR_TYPE_UINT8: return BaseType::UInt8; + case SLANG_SCALAR_TYPE_FLOAT16: return BaseType::Half; + case SLANG_SCALAR_TYPE_FLOAT32: return BaseType::Float; + case SLANG_SCALAR_TYPE_FLOAT64: return BaseType::Double; + case SLANG_SCALAR_TYPE_BOOL: return BaseType::Bool; + default: return BaseType::Void; + } +} + +// TODO(JS): There is an inherent problem here: +// +// TimF: The big gotcha you'd have with trying to look up the IRVar or whatever from an intrinsic is that it is very easy for the user to "smuggle" a resource-type value through an intermediate function: +// +// ``` +// Imagine this is user code... +// void f(RWTexture2D t) { t.YourOpThatYouAdded(...); }[attributeYouCareAbout(...)] +// RWTexture2D gTex; +// ... +// f(gTex); +// +// ``` +// +// So when emitting IR code for f, there is no way to trace t back to gTex and get at[attributeYouCareAbout(...)] +// Structurally, you can get back to the IRParam for t and that's it. +// And even if there was some magic way to trace back through the call site, you would run into the problem that some call sites +// might call f(gTex) and other might call f(gSomeOtherTex) and there is no guarantee the attributes on those two textures would match. +// +// The VK back-end gets away with this kind of coincidentally, since the "legalization" we have to do for resources means that there wouldn't be a single f() function any more. +// But for CUDA and C++ that's not the case or generally desirable. + +IRFormatDecoration* _findImageFormatDecoration(IRInst* inst) +{ + // JS(TODO): + // There could perhaps be other situations, that need to be covered + + // If this is a load, we need to get the decoration from the field key + if (IRLoad* load = as<IRLoad>(inst)) + { + if (IRFieldAddress* fieldAddress = as<IRFieldAddress>(load->getOperand(0))) + { + IRInst* field = fieldAddress->getField(); + return field->findDecoration<IRFormatDecoration>(); + } + } + // Otherwise just try on the instruction + return inst->findDecoration<IRFormatDecoration>(); +} + +bool _isImageFormatCompatible(ImageFormat imageFormat, IRType* dataType) +{ + int numElems = 1; + + if (auto vecType = as<IRVectorType>(dataType)) + { + numElems = int(getIntVal(vecType->getElementCount())); + dataType = vecType->getElementType(); + } + + BaseType baseType = BaseType::Void; + if (auto basicType = as<IRBasicType>(dataType)) + { + baseType = basicType->getBaseType(); + } + + const auto& imageFormatInfo = getImageFormatInfo(imageFormat); + const BaseType formatBaseType = _getBaseTypeFromScalarType(imageFormatInfo.scalarType); + + if (numElems != imageFormatInfo.channelCount) + { + SLANG_ASSERT(!"Format doesn't match channel count"); + return false; + } + + return formatBaseType == baseType; +} + const char* IntrinsicExpandContext::_emitSpecial(const char* cursor) { const char*const end = m_text.end(); @@ -168,6 +256,32 @@ const char* IntrinsicExpandContext::_emitSpecial(const char* cursor) } break; + case 'C': + { + // The $C intrinsic is a mechanism to change the name of an invocation depending on if there is a format + // conversion required between the type associated by the resource and the backing ImageFormat. + // Currently this is only implemented on CUDA, where there are specialized versions of the RWTexture + // writes that will do a format conversion. + if (m_emitter->getTarget() == CodeGenTarget::CUDASource) + { + IRInst* arg0 = m_callInst->getArg(0); + + if (IRFormatDecoration* formatDecoration = _findImageFormatDecoration(arg0)) + { + const ImageFormat imageFormat = formatDecoration->getFormat(); + auto textureType = as<IRTextureTypeBase>(arg0->getDataType()); + IRType* elementType = textureType ? textureType->getElementType() : nullptr; + + if (elementType && ! _isImageFormatCompatible(imageFormat, elementType)) + { + // Append _convert on the name to signify we need to use a code path, that will automatically + // do the format conversion. + m_writer->emit("_convert"); + } + } + } + break; + } case 'c': { // When doing texture access in glsl the result may need to be cast. diff --git a/source/slang/slang-intrinsic-expand.h b/source/slang/slang-intrinsic-expand.h index 468e1f80a..b52e3e8f5 100644 --- a/source/slang/slang-intrinsic-expand.h +++ b/source/slang/slang-intrinsic-expand.h @@ -24,6 +24,7 @@ protected: SourceWriter* m_writer; UnownedStringSlice m_text; + IRCall* m_callInst; IRUse* m_args = nullptr; Int m_argCount = 0; Index m_openParenCount = 0; diff --git a/source/slang/slang-syntax.cpp b/source/slang/slang-syntax.cpp index d5f2c13db..6b2140056 100644 --- a/source/slang/slang-syntax.cpp +++ b/source/slang/slang-syntax.cpp @@ -1186,38 +1186,40 @@ Module* getModule(Decl* decl) return moduleDecl->module; } -bool findImageFormatByName(char const* name, ImageFormat* outFormat) +static const ImageFormatInfo kImageFormatInfos[] = { - static const struct - { - char const* name; - ImageFormat format; - } kFormats[] = - { -#define FORMAT(NAME) { #NAME, ImageFormat::NAME }, +#define SLANG_IMAGE_FORMAT_INFO(TYPE, COUNT, SIZE) SLANG_SCALAR_TYPE_##TYPE, uint8_t(COUNT), uint8_t(SIZE) +#define FORMAT(NAME, OTHER) \ + { SLANG_IMAGE_FORMAT_INFO OTHER, UnownedStringSlice::fromLiteral(#NAME) }, #include "slang-image-format-defs.h" - }; +#undef FORMAT +#undef SLANG_IMAGE_FORMAT_INFO +}; - for( auto item : kFormats ) +bool findImageFormatByName(char const* inName, ImageFormat* outFormat) +{ + const UnownedStringSlice name(inName); + + for (Index i = 0; i < SLANG_COUNT_OF(kImageFormatInfos); ++i) { - if( strcmp(item.name, name) == 0 ) + const auto& info = kImageFormatInfos[i]; + if (info.name == name) { - *outFormat = item.format; + *outFormat = ImageFormat(i); return true; } } - return false; } char const* getGLSLNameForImageFormat(ImageFormat format) { - switch( format ) - { - default: return "unhandled"; -#define FORMAT(NAME) case ImageFormat::NAME: return #NAME; -#include "slang-image-format-defs.h" - } + return kImageFormatInfos[Index(format)].name.begin(); } + const ImageFormatInfo& getImageFormatInfo(ImageFormat format) + { + return kImageFormatInfos[Index(format)]; + } + } // namespace Slang diff --git a/tests/compute/half-rw-texture-convert.slang b/tests/compute/half-rw-texture-convert.slang new file mode 100644 index 000000000..cf6eea4ea --- /dev/null +++ b/tests/compute/half-rw-texture-convert.slang @@ -0,0 +1,42 @@ + + +// Native half not supported on CPU currently +//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-cpu -compute -output-using-type -shaderobj +// Doesn't work on DX11 currently - locks up on binding +//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -output-using-type -shaderobj +// Produces a different result on DX12 with DXBC than expected(!). So disabled for now +//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -output-using-type -shaderobj +//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -profile cs_6_0 -use-dxil -output-using-type -shaderobj +// TODO(JS): Doesn't work on vk currently, because createTextureView not implemented on vk renderer +//DIABLE_TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute -output-using-type -shaderobj + +// Note that this test is a little silly. The output does not confirm that the write actually worked. +// This is because it's not trivial on CUDA to do so. If there is a write conversion, the RWTexture +// is backed by a surface. There is a texture converting write (in sust.p) but not in reading. +// +// In practice if if you wanted a texture read, you'd either only be able to read *without* format +// conversion, or via a separate binding of the same surface as a Texture. +// There's no simple way to describe either, so this test just confirms it outputs PTX that can +// be executed, and unfortunately doesn't test if the write conversion actually *worked* + +//TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute -output-using-type -shaderobj -render-features half + +//TEST_INPUT: RWTexture2D(format=R_Float16, size=4, content = one, mipMaps = 1):name rwt2D +[format("r16f")] +RWTexture2D<float> rwt2D; + +//TEST_INPUT: ubuffer(data=[0 0 0 0], stride=4):out,name outputBuffer +RWStructuredBuffer<float> outputBuffer; + +[numthreads(4, 1, 1)] +void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) +{ + int idx = dispatchThreadID.x; + + float val = idx; + + // Do a format converting write! + rwt2D[uint2(idx, idx)] = val; + + outputBuffer[idx] = val; +} diff --git a/tests/compute/half-rw-texture-convert.slang.expected.txt b/tests/compute/half-rw-texture-convert.slang.expected.txt new file mode 100644 index 000000000..8867afc5a --- /dev/null +++ b/tests/compute/half-rw-texture-convert.slang.expected.txt @@ -0,0 +1,5 @@ +type: float +0.000000 +1.000000 +2.000000 +3.000000 |
