diff options
35 files changed, 2399 insertions, 1474 deletions
diff --git a/build/visual-studio/slang/slang.vcxproj b/build/visual-studio/slang/slang.vcxproj index a7359e001..f5d28d800 100644 --- a/build/visual-studio/slang/slang.vcxproj +++ b/build/visual-studio/slang/slang.vcxproj @@ -404,6 +404,7 @@ IF EXIST ..\..\..\external\slang-glslang\bin\windows-aarch64\release\slang-glsla <ClInclude Include="..\..\..\source\slang\slang-ir-legalize-mesh-outputs.h" />
<ClInclude Include="..\..\..\source\slang\slang-ir-legalize-uniform-buffer-load.h" />
<ClInclude Include="..\..\..\source\slang\slang-ir-legalize-varying-params.h" />
+ <ClInclude Include="..\..\..\source\slang\slang-ir-legalize-vector-types.h" />
<ClInclude Include="..\..\..\source\slang\slang-ir-link.h" />
<ClInclude Include="..\..\..\source\slang\slang-ir-liveness.h" />
<ClInclude Include="..\..\..\source\slang\slang-ir-loop-inversion.h" />
@@ -505,6 +506,7 @@ IF EXIST ..\..\..\external\slang-glslang\bin\windows-aarch64\release\slang-glsla <ClInclude Include="..\..\..\source\slang\slang-serialize-value-type-info.h" />
<ClInclude Include="..\..\..\source\slang\slang-serialize.h" />
<ClInclude Include="..\..\..\source\slang\slang-spirv-val.h" />
+ <ClInclude Include="..\..\..\source\slang\slang-stdlib-textures.h" />
<ClInclude Include="..\..\..\source\slang\slang-syntax.h" />
<ClInclude Include="..\..\..\source\slang\slang-type-layout.h" />
<ClInclude Include="..\..\..\source\slang\slang-type-system-shared.h" />
@@ -616,6 +618,7 @@ IF EXIST ..\..\..\external\slang-glslang\bin\windows-aarch64\release\slang-glsla <ClCompile Include="..\..\..\source\slang\slang-ir-legalize-types.cpp" />
<ClCompile Include="..\..\..\source\slang\slang-ir-legalize-uniform-buffer-load.cpp" />
<ClCompile Include="..\..\..\source\slang\slang-ir-legalize-varying-params.cpp" />
+ <ClCompile Include="..\..\..\source\slang\slang-ir-legalize-vector-types.cpp" />
<ClCompile Include="..\..\..\source\slang\slang-ir-link.cpp" />
<ClCompile Include="..\..\..\source\slang\slang-ir-liveness.cpp" />
<ClCompile Include="..\..\..\source\slang\slang-ir-loop-inversion.cpp" />
@@ -715,6 +718,7 @@ IF EXIST ..\..\..\external\slang-glslang\bin\windows-aarch64\release\slang-glsla <ClCompile Include="..\..\..\source\slang\slang-spirv-core-grammar-embed.cpp" />
<ClCompile Include="..\..\..\source\slang\slang-spirv-val.cpp" />
<ClCompile Include="..\..\..\source\slang\slang-stdlib-api.cpp" />
+ <ClCompile Include="..\..\..\source\slang\slang-stdlib-textures.cpp" />
<ClCompile Include="..\..\..\source\slang\slang-stdlib.cpp" />
<ClCompile Include="..\..\..\source\slang\slang-syntax.cpp" />
<ClCompile Include="..\..\..\source\slang\slang-type-layout.cpp" />
diff --git a/build/visual-studio/slang/slang.vcxproj.filters b/build/visual-studio/slang/slang.vcxproj.filters index 3e89626be..193358c37 100644 --- a/build/visual-studio/slang/slang.vcxproj.filters +++ b/build/visual-studio/slang/slang.vcxproj.filters @@ -300,6 +300,9 @@ <ClInclude Include="..\..\..\source\slang\slang-ir-legalize-varying-params.h">
<Filter>Header Files</Filter>
</ClInclude>
+ <ClInclude Include="..\..\..\source\slang\slang-ir-legalize-vector-types.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
<ClInclude Include="..\..\..\source\slang\slang-ir-link.h">
<Filter>Header Files</Filter>
</ClInclude>
@@ -603,6 +606,9 @@ <ClInclude Include="..\..\..\source\slang\slang-spirv-val.h">
<Filter>Header Files</Filter>
</ClInclude>
+ <ClInclude Include="..\..\..\source\slang\slang-stdlib-textures.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
<ClInclude Include="..\..\..\source\slang\slang-syntax.h">
<Filter>Header Files</Filter>
</ClInclude>
@@ -932,6 +938,9 @@ <ClCompile Include="..\..\..\source\slang\slang-ir-legalize-varying-params.cpp">
<Filter>Source Files</Filter>
</ClCompile>
+ <ClCompile Include="..\..\..\source\slang\slang-ir-legalize-vector-types.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
<ClCompile Include="..\..\..\source\slang\slang-ir-link.cpp">
<Filter>Source Files</Filter>
</ClCompile>
@@ -1229,6 +1238,9 @@ <ClCompile Include="..\..\..\source\slang\slang-stdlib-api.cpp">
<Filter>Source Files</Filter>
</ClCompile>
+ <ClCompile Include="..\..\..\source\slang\slang-stdlib-textures.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
<ClCompile Include="..\..\..\source\slang\slang-stdlib.cpp">
<Filter>Source Files</Filter>
</ClCompile>
diff --git a/source/slang/core.meta.slang b/source/slang/core.meta.slang index 96e6d284a..956a5b29a 100644 --- a/source/slang/core.meta.slang +++ b/source/slang/core.meta.slang @@ -1065,8 +1065,6 @@ __generic<T> __extension vector<T, 4> ${{{{ -static const char* kComponentNames[]{ "x", "y", "z", "w" }; - // The above extensions are generic in the *type* of the vector, // but explicit in the *size*. We will now declare an extension // for each builtin type that is generic in the size. @@ -1256,1054 +1254,6 @@ struct SamplerComparisonState ${{{{ -static const struct BaseTextureShapeInfo { - char const* shapeName; - TextureFlavor::Shape baseShape; - int coordCount; -} kBaseTextureShapes[] = { - { "1D", TextureFlavor::Shape::Shape1D, 1 }, - { "2D", TextureFlavor::Shape::Shape2D, 2 }, - { "3D", TextureFlavor::Shape::Shape3D, 3 }, - { "Cube", TextureFlavor::Shape::ShapeCube,3 }, -}; - -static const struct BaseTextureAccessInfo { - char const* name; - SlangResourceAccess access; -} kBaseTextureAccessLevels[] = { - { "", SLANG_RESOURCE_ACCESS_READ }, - { "RW", SLANG_RESOURCE_ACCESS_READ_WRITE }, - { "RasterizerOrdered", SLANG_RESOURCE_ACCESS_RASTER_ORDERED }, -}; - -static const struct TextureTypePrefixInfo -{ - char const* name; - bool combined; -} kTexturePrefixes[] = -{ - { "Texture", false }, - { "Sampler", true }, -}; - -struct TextureTypeInfo -{ - TextureTypeInfo( - TextureTypePrefixInfo const& prefixInfo, - BaseTextureShapeInfo const& base, - bool isArray, - bool isMultisample, - BaseTextureAccessInfo const& accessInfo, - StringBuilder& inSB, - String const& inPath) - : prefixInfo(prefixInfo) - , base(base) - , isArray(isArray) - , isMultisample(isMultisample) - , accessInfo(accessInfo) - , sb(inSB) - , path(inPath) - { - } - - TextureTypePrefixInfo const& prefixInfo; - BaseTextureShapeInfo const& base; - bool isArray; - bool isMultisample; - BaseTextureAccessInfo const& accessInfo; - StringBuilder& sb; - String path; - - void emitTypeDecl() - { - char const* baseName = prefixInfo.name; - char const* baseShapeName = base.shapeName; - TextureFlavor::Shape baseShape = base.baseShape; - - // Arrays of 3D textures aren't allowed - if (isArray && baseShape == TextureFlavor::Shape::Shape3D) return; - - auto access = accessInfo.access; - - // No such thing as RWTextureCube - if (access == SLANG_RESOURCE_ACCESS_READ_WRITE && baseShape == TextureFlavor::Shape::ShapeCube) - { - return; - } - - bool isReadOnly = (access == SLANG_RESOURCE_ACCESS_READ); - // TODO: any constraints to enforce on what gets to be multisampled? - - unsigned flavor = baseShape; - if (isArray) flavor |= TextureFlavor::ArrayFlag; - if (isMultisample) flavor |= TextureFlavor::MultisampleFlag; -// if (isShadow) flavor |= TextureFlavor::ShadowFlag; - - flavor |= (access << 8); - - // emit a generic signature - sb << "__generic<T = float4"; - // Multi-sample rw texture types have an optional sampleCount parameter. - if (isMultisample) - sb << ", let sampleCount : int = 0"; - sb << ">"; - - if(prefixInfo.combined) - { - sb << "__magic_type(TextureSamplerType," << int(flavor) << ")\n"; - sb << "__intrinsic_type(" << (kIROp_TextureSamplerType + (int(flavor) << kIROpMeta_OtherShift)) << ")\n"; - } - else - { - sb << "__magic_type(TextureType," << int(flavor) << ")\n"; - sb << "__intrinsic_type(" << (kIROp_TextureType + (int(flavor) << kIROpMeta_OtherShift)) << ")\n"; - } - sb << "struct "; - sb << accessInfo.name; - sb << baseName; - sb << baseShapeName; - if (isMultisample) sb << "MS"; - if (isArray) sb << "Array"; -// if (isShadow) sb << "Shadow"; - sb << "\n{"; - - char const* samplerStateParam = prefixInfo.combined ? "" : "SamplerState s, "; - - if( !isMultisample ) - { - sb << "__target_intrinsic(glsl, \"textureQueryLod($p, $2).x\")"; - sb << "float CalculateLevelOfDetail(" << samplerStateParam; - sb << "float" << base.coordCount << " location);\n"; - - sb << "__target_intrinsic(glsl, \"textureQueryLod($p, $2).y\")"; - sb << "float CalculateLevelOfDetailUnclamped(" << samplerStateParam; - sb << "float" << base.coordCount << " location);\n"; - } - - // `GetDimensions` - const char* dimParamTypes[] = {"out float ", "out int ", "out uint "}; - for(auto t : dimParamTypes) - for(int includeMipInfo = 0; includeMipInfo < 2; ++includeMipInfo) - { - { - sb << "__glsl_version(450)\n"; - sb << "__glsl_extension(GL_EXT_samplerless_texture_functions)"; - sb << "__target_intrinsic(glsl, \"("; - - int aa = 1; - String lodStr = ", 0"; - if (includeMipInfo) - { - int mipLevelArg = aa++; - lodStr = ", int($"; - lodStr.append(mipLevelArg); - lodStr.append(")"); - } - - String opStr = " = textureSize($0" + lodStr; - switch( access ) - { - case SLANG_RESOURCE_ACCESS_READ_WRITE: - case SLANG_RESOURCE_ACCESS_RASTER_ORDERED: - opStr = " = imageSize($0"; - break; - - default: - break; - } - - - int cc = 0; - switch(baseShape) - { - case TextureFlavor::Shape::Shape1D: - sb << "($" << aa++ << opStr << ")"; - if (isArray) - { - sb << ".x"; - } - sb << ")"; - cc = 1; - break; - - case TextureFlavor::Shape::Shape2D: - case TextureFlavor::Shape::ShapeCube: - sb << "($" << aa++ << opStr << ").x)"; - sb << ", ($" << aa++ << opStr << ").y)"; - cc = 2; - break; - - case TextureFlavor::Shape::Shape3D: - sb << "($" << aa++ << opStr << ").x)"; - sb << ", ($" << aa++ << opStr << ").y)"; - sb << ", ($" << aa++ << opStr << ").z)"; - cc = 3; - break; - - default: - SLANG_UNEXPECTED("unhandled resource shape"); - break; - } - - if(isArray) - { - sb << ", ($" << aa++ << opStr << ")." << kComponentNames[cc] << ")"; - } - - if(isMultisample) - { - sb << ", ($" << aa++ << " = textureSamples($0))"; - } - - if (includeMipInfo) - { - sb << ", ($" << aa++ << " = textureQueryLevels($0))"; - } - - - sb << ")\")\n"; - } - - sb << "[__readNone]\n"; - sb << "void GetDimensions("; - if(includeMipInfo) - sb << "uint mipLevel, "; - - switch(baseShape) - { - case TextureFlavor::Shape::Shape1D: - sb << t << "width"; - break; - - case TextureFlavor::Shape::Shape2D: - case TextureFlavor::Shape::ShapeCube: - sb << t << "width,"; - sb << t << "height"; - break; - - case TextureFlavor::Shape::Shape3D: - sb << t << "width,"; - sb << t << "height,"; - sb << t << "depth"; - break; - - default: - assert(!"unexpected"); - break; - } - - if(isArray) - { - sb << ", " << t << "elements"; - } - - if(isMultisample) - { - sb << ", " << t << "sampleCount"; - } - - if(includeMipInfo) - sb << ", " << t << "numberOfLevels"; - - sb << ");\n"; - } - - // `GetSamplePosition()` - if( isMultisample ) - { - sb << "float2 GetSamplePosition(int s);\n"; - } - - // `Load()` - - if( base.coordCount + isArray < 4 ) - { - // The `Load()` operation on an ordinary `Texture2D` takes - // an `int3` for the location, where `.xy` holds the texel - // coordinates, and `.z` holds the mip level to use. - // - // The third coordinate for mip level is absent in - // `Texure2DMS.Load()` and `RWTexture2D.Load`. This pattern - // is repreated for all the other texture shapes. - // - bool needsMipLevel = !isMultisample && (access == SLANG_RESOURCE_ACCESS_READ); - - int loadCoordCount = base.coordCount + isArray + (needsMipLevel?1:0); - - char const* glslFuncName = (access == SLANG_RESOURCE_ACCESS_READ) ? "texelFetch" : "imageLoad"; - - // When translating to GLSL, we need to break apart the `location` argument. - // - // TODO: this should realy be handled by having this member actually get lowered! - static const char* kGLSLLoadCoordsSwizzle[] = { "", "", "x", "xy", "xyz", "xyzw" }; - static const char* kGLSLLoadLODSwizzle[] = { "", "", "y", "z", "w", "error" }; - - // TODO: The GLSL translations here only handle the read-only texture - // cases (stuff that lowers to `texture*` in GLSL) and not the stuff - // that lowers to `image*`. - // - // At some point it may make sense to separate the read-only and - // `RW`/`RasterizerOrdered` cases here rather than try to share code. - - if (isMultisample) - { - sb << "__glsl_extension(GL_EXT_samplerless_texture_functions)"; - sb << "__target_intrinsic(glsl, \"$c" << glslFuncName << "($0, $1, $2)$z\")\n"; - } - else - { - sb << "__glsl_extension(GL_EXT_samplerless_texture_functions)"; - sb << "__target_intrinsic(glsl, \"$c" << glslFuncName << "($0, "; - if( needsMipLevel ) - { - sb << "($1)." << kGLSLLoadCoordsSwizzle[loadCoordCount] << ", ($1)." << kGLSLLoadLODSwizzle[loadCoordCount]; - } - else - { - sb << "$1"; - } - sb << ")$z\")\n"; - - } - - // CUDA - if (isMultisample) - { - } - else - { - if (access == SLANG_RESOURCE_ACCESS_READ_WRITE) - { - const int coordCount = base.coordCount; - const int vecCount = coordCount + int(isArray); - - if( baseShape != TextureFlavor::Shape::ShapeCube ) - { - sb << "__target_intrinsic(cuda, \"surf" << coordCount << "D"; - if (isArray) - { - sb << "Layered"; - } - sb << "read"; - sb << "<$T0>($0"; - for (int i = 0; i < coordCount; ++i) - { - sb << ", ($1)"; - if (vecCount > 1) - { - sb << '.' << char(i + 'x'); - } - - // Surface access is *byte* addressed in x in CUDA - if (i == 0) - { - sb << " * $E"; - } - } - if (isArray) - { - sb << ", int(($1)." << char(coordCount + 'x') << ")"; - } - sb << ", SLANG_CUDA_BOUNDARY_MODE)\")\n"; - } - else - { - sb << "__target_intrinsic(cuda, \"surfCubemap"; - if (isArray) - { - sb << "Layered"; - } - sb << "read"; - - // Surface access is *byte* addressed in x in CUDA - sb << "<$T0>($0, ($1).x * $E, ($1).y, ($1).z"; - if (isArray) - { - sb << ", int(($1).w)"; - } - sb << ", SLANG_CUDA_BOUNDARY_MODE)\")\n"; - } - } - else if (access == SLANG_RESOURCE_ACCESS_READ) - { - // We can allow this on Texture1D - if( baseShape == TextureFlavor::Shape::Shape1D && isArray == false) - { - sb << "__target_intrinsic(cuda, \"tex1Dfetch<$T0>($0, ($1).x)\")\n"; - } - } - } - - if (isReadOnly) - sb << "[__readNone]\n"; - sb << "T Load("; - sb << "int" << loadCoordCount << " location"; - if(isMultisample) - { - sb << ", int sampleIndex"; - } - sb << ");\n"; - - - // GLSL - glslFuncName = (access == SLANG_RESOURCE_ACCESS_READ) ? "texelFetchOffset" : "imageLoad"; - if (isMultisample) - { - sb << "__glsl_extension(GL_EXT_samplerless_texture_functions)"; - sb << "__target_intrinsic(glsl, \"$c" << glslFuncName << "($0, $0, $1, $2)$z\")\n"; - } - else - { - sb << "__glsl_extension(GL_EXT_samplerless_texture_functions)"; - sb << "__target_intrinsic(glsl, \"$c" << glslFuncName << "($0, "; - if( needsMipLevel ) - { - sb << "($1)." << kGLSLLoadCoordsSwizzle[loadCoordCount] << ", ($1)." << kGLSLLoadLODSwizzle[loadCoordCount]; - sb << ", $2)$z\")\n"; - } - else - { - sb << "$1, 0, $2)$z\")\n"; - } - } - - if (isReadOnly) - sb << "[__readNone]\n"; - sb << "T Load("; - sb << "int" << loadCoordCount << " location"; - if(isMultisample) - { - sb << ", int sampleIndex"; - } - sb << ", constexpr int" << base.coordCount << " offset"; - sb << ");\n"; - - if (isReadOnly) - sb << "[__readNone]\n"; - sb << "T Load("; - sb << "int" << loadCoordCount << " location"; - if(isMultisample) - { - sb << ", int sampleIndex"; - } - sb << ", constexpr int" << base.coordCount << " offset"; - sb << ", out uint status"; - sb << ");\n"; - } - - if(baseShape != TextureFlavor::Shape::ShapeCube) - { - int N = base.coordCount + isArray; - - char const* uintNs[] = { "", "uint", "uint2", "uint3", "uint4" }; - char const* ivecNs[] = { "", "int", "ivec2", "ivec3", "ivec4" }; - - auto uintN = uintNs[N]; - auto ivecN = ivecNs[N]; - - // subscript operator - sb << "__subscript(" << uintN << " location) -> T {\n"; - - // !!!!!!!!!!!!!!!!!!!! get !!!!!!!!!!!!!!!!!!!!!!! - - // GLSL/SPIR-V distinguished sampled vs. non-sampled images - { - switch( access ) - { - case SLANG_RESOURCE_ACCESS_NONE: - case SLANG_RESOURCE_ACCESS_READ: - sb << "__glsl_extension(GL_EXT_samplerless_texture_functions)"; - sb << "__target_intrinsic(glsl, \"$ctexelFetch($0, " << ivecN << "($1)"; - if( !isMultisample ) - { - sb << ", 0"; - } - else - { - // TODO: how to handle passing through sample index? - sb << ", 0"; - } - break; - - default: - sb << "__target_intrinsic(glsl, \"$cimageLoad($0, " << ivecN << "($1)"; - if( isMultisample ) - { - // TODO: how to handle passing through sample index? - sb << ", 0"; - } - break; - } - sb << ")$z\")\n"; - } - - // CUDA - { - if (access == SLANG_RESOURCE_ACCESS_READ_WRITE) - { - const int coordCount = base.coordCount; - const int vecCount = coordCount + int(isArray); - - sb << "__target_intrinsic(cuda, \"surf"; - if( baseShape != TextureFlavor::Shape::ShapeCube ) - { - sb << coordCount << "D"; - } - else - { - sb << "Cubemap"; - } - - sb << (isArray ? "Layered" : ""); - sb << "read$C<$T0>($0"; - - for (int i = 0; i < vecCount; ++i) - { - sb << ", ($1)"; - if (vecCount > 1) - { - sb << '.' << char(i + 'x'); - } - // Surface access is *byte* addressed in x in CUDA - if (i == 0) - { - sb << " * $E"; - } - } - - sb << ", SLANG_CUDA_BOUNDARY_MODE)\")\n"; - } - else if (access == SLANG_RESOURCE_ACCESS_READ) - { - // We can allow this on Texture1D - if( baseShape == TextureFlavor::Shape::Shape1D && isArray == false) - { - sb << "__target_intrinsic(cuda, \"tex1Dfetch<$T0>($0, $1)\")\n"; - } - } - } - - // Output that has get - if (isReadOnly) - sb << "[__readNone]\n"; - sb << " get;\n"; - - // !!!!!!!!!!!!!!!!!!!! set !!!!!!!!!!!!!!!!!!!!!!! - - if (!(access == SLANG_RESOURCE_ACCESS_NONE || access == SLANG_RESOURCE_ACCESS_READ)) - { - // GLSL - sb << "__target_intrinsic(glsl, \"imageStore($0, " << ivecN << "($1), $V2)\")\n"; - - // CUDA - { - const int coordCount = base.coordCount; - const int vecCount = coordCount + int(isArray); - - sb << "__target_intrinsic(cuda, \"surf"; - if( baseShape != TextureFlavor::Shape::ShapeCube ) - { - sb << coordCount << "D"; - } - else - { - sb << "Cubemap"; - } - - sb << (isArray ? "Layered" : ""); - sb << "write$C<$T0>($2, $0"; - for (int i = 0; i < vecCount; ++i) - { - sb << ", ($1)"; - if (vecCount > 1) - { - sb << '.' << char(i + 'x'); - } - - // Surface access is *byte* addressed in x in CUDA - if (i == 0) - { - sb << " * $E"; - } - } - - sb << ", SLANG_CUDA_BOUNDARY_MODE)\")\n"; - } - - // Set - sb << " [nonmutating] set;\n"; - } - - // !!!!!!!!!!!!!!!!!! ref !!!!!!!!!!!!!!!!!!!!!!!!! - - // Depending on the access level of the texture type, - // we either have just a getter (the default), or both - // a getter and setter. - switch( access ) - { - case SLANG_RESOURCE_ACCESS_NONE: - case SLANG_RESOURCE_ACCESS_READ: - break; - default: - sb << "__intrinsic_op(" << int(kIROp_ImageSubscript) << ") ref;\n"; - break; - } - - sb << "}\n"; - } - - if( !isMultisample ) - { - // `Sample()` - - sb << "__target_intrinsic(glsl, \"$ctexture($p, $2)$z\")\n"; - - // CUDA - { - const int coordCount = base.coordCount; - const int vecCount = coordCount + int(isArray); - - if( baseShape != TextureFlavor::Shape::ShapeCube ) - { - sb << "__target_intrinsic(cuda, \"tex" << coordCount << "D"; - if (isArray) - { - sb << "Layered"; - } - sb << "<$T0>($0"; - for (int i = 0; i < coordCount; ++i) - { - sb << ", ($2)"; - if (vecCount > 1) - { - sb << '.' << char(i + 'x'); - } - } - if (isArray) - { - sb << ", int(($2)." << char(coordCount + 'x') << ")"; - } - sb << ")\")\n"; - } - else - { - sb << "__target_intrinsic(cuda, \"texCubemap"; - if (isArray) - { - sb << "Layered"; - } - sb << "<$T0>($0, ($2).x, ($2).y, ($2).z"; - if (isArray) - { - sb << ", int(($2).w)"; - } - sb << ")\")\n"; - } - } - - if (isReadOnly) - sb << "[__readNone]\n"; - sb << "T Sample(" << samplerStateParam;; - sb << "float" << base.coordCount + isArray << " location);\n"; - - if( baseShape != TextureFlavor::Shape::ShapeCube ) - { - sb << "__target_intrinsic(glsl, \"$ctextureOffset($p, $2, $3)$z\")\n"; - if (isReadOnly) - sb << "[__readNone]\n"; - sb << "T Sample(" << samplerStateParam;; - sb << "float" << base.coordCount + isArray << " location, "; - sb << "constexpr int" << base.coordCount << " offset);\n"; - } - - if (isReadOnly) - sb << "[__readNone]\n"; - sb << "T Sample(" << samplerStateParam; - sb << "float" << base.coordCount + isArray << " location, "; - if( baseShape != TextureFlavor::Shape::ShapeCube ) - { - sb << "constexpr int" << base.coordCount << " offset, "; - } - sb << "float clamp);\n"; - - if (isReadOnly) - sb << "[__readNone]\n"; - sb << "T Sample(" << samplerStateParam; - sb << "float" << base.coordCount + isArray << " location, "; - if( baseShape != TextureFlavor::Shape::ShapeCube ) - { - sb << "constexpr int" << base.coordCount << " offset, "; - } - sb << "float clamp, out uint status);\n"; - - // `SampleBias()` - sb << "__target_intrinsic(glsl, \"$ctexture($p, $2, $3)$z\")\n"; - if (isReadOnly) - sb << "[__readNone]\n"; - sb << "T SampleBias(" << samplerStateParam; - sb << "float" << base.coordCount + isArray << " location, float bias);\n"; - - if( baseShape != TextureFlavor::Shape::ShapeCube ) - { - sb << "__target_intrinsic(glsl, \"$ctextureOffset($p, $2, $3, $4)$z\")\n"; - if (isReadOnly) - sb << "[__readNone]\n"; - sb << "T SampleBias(" << samplerStateParam; - sb << "float" << base.coordCount + isArray << " location, float bias, "; - sb << "constexpr int" << base.coordCount << " offset);\n"; - } - int baseCoordCount = base.coordCount; - int arrCoordCount = baseCoordCount + isArray; - if (arrCoordCount <= 3) - { - // `SampleCmp()` and `SampleCmpLevelZero` - sb << "__target_intrinsic(glsl, \"texture($p, vec" << arrCoordCount + 1 << "($2, $3))\")"; - if (isReadOnly) - sb << "[__readNone]\n"; - sb << "float SampleCmp(SamplerComparisonState s, "; - sb << "float" << base.coordCount + isArray << " location, "; - sb << "float compareValue"; - sb << ");\n"; - sb << "__target_intrinsic(glsl, \"textureLod($p, vec" << arrCoordCount + 1 << "($2, $3), 0)\")"; - if (isReadOnly) - sb << "[__readNone]\n"; - sb << "float SampleCmpLevelZero(SamplerComparisonState s, "; - sb << "float" << base.coordCount + isArray << " location, "; - sb << "float compareValue"; - sb << ");\n"; - } - if (arrCoordCount < 3) - { - int extCoordCount = arrCoordCount + 1; - - if (extCoordCount < 3) - extCoordCount = 3; - - sb << "__target_intrinsic(glsl, \"$ctextureLod($p, "; - - sb << "vec" << extCoordCount << "($2,"; - for (int ii = arrCoordCount; ii < extCoordCount - 1; ++ii) - { - sb << " 0.0,"; - } - sb << "$3)"; - - sb << ", 0.0)$z\")\n"; - } - else if(arrCoordCount <= 3) - { - int extCoordCount = arrCoordCount + 1; - - if (extCoordCount < 3) - extCoordCount = 3; - - sb << "__target_intrinsic(glsl, \"$ctextureGrad($p, "; - - sb << "vec" << extCoordCount << "($2,"; - for (int ii = arrCoordCount; ii < extCoordCount - 1; ++ii) - { - sb << " 0.0,"; - } - sb << "$3)"; - - // Construct gradients - sb << ", vec" << baseCoordCount << "(0.0)"; - sb << ", vec" << baseCoordCount << "(0.0)"; - sb << ")$z\")\n"; - } - - - if( baseShape != TextureFlavor::Shape::ShapeCube ) - { - // Note(tfoley): MSDN seems confused, and claims that the `offset` - // parameter for `SampleCmp` is available for everything but 3D - // textures, while `Sample` and `SampleBias` are consistent in - // saying they only exclude `offset` for cube maps (which makes - // sense). I'm going to assume the documentation for `SampleCmp` - // is just wrong. - sb << "__target_intrinsic(glsl, \"textureOffset($p, vec" << arrCoordCount + 1 << "($2, $3), $4)\")"; - if (isReadOnly) - sb << "[__readNone]\n"; - sb << "float SampleCmp(SamplerComparisonState s, "; - sb << "float" << base.coordCount + isArray << " location, "; - sb << "float compareValue, "; - sb << "constexpr int" << base.coordCount << " offset);\n"; - - sb << "__target_intrinsic(glsl, \"textureLodOffset($p, vec" << arrCoordCount + 1 << "($2, $3), 0, $4)\")"; - if (isReadOnly) - sb << "[__readNone]\n"; - sb << "float SampleCmpLevelZero(SamplerComparisonState s, "; - sb << "float" << base.coordCount + isArray << " location, "; - sb << "float compareValue, "; - sb << "constexpr int" << base.coordCount << " offset);\n"; - } - - // TODO(JS): Not clear how to map this to CUDA, because in HLSL, the gradient is a vector based on - // the dimension. On CUDA there is texNDGrad, but it always just takes ddx, ddy. - // I could just assume 0 for elements not supplied, and ignore z. For now will just leave - sb << "__target_intrinsic(glsl, \"$ctextureGrad($p, $2, $3, $4)$z\")\n"; - if (isReadOnly) - sb << "[__readNone]\n"; - sb << "T SampleGrad(" << samplerStateParam; - sb << "float" << base.coordCount + isArray << " location, "; - sb << "float" << base.coordCount << " gradX, "; - sb << "float" << base.coordCount << " gradY"; - sb << ");\n"; - - if( baseShape != TextureFlavor::Shape::ShapeCube ) - { - sb << "__target_intrinsic(glsl, \"$ctextureGradOffset($p, $2, $3, $4, $5)$z\")\n"; - if (isReadOnly) - sb << "[__readNone]\n"; - sb << "T SampleGrad(" << samplerStateParam; - sb << "float" << base.coordCount + isArray << " location, "; - sb << "float" << base.coordCount << " gradX, "; - sb << "float" << base.coordCount << " gradY, "; - sb << "constexpr int" << base.coordCount << " offset);\n"; - - sb << "__glsl_extension(GL_ARB_sparse_texture_clamp)"; - sb << "__target_intrinsic(glsl, \"$ctextureGradOffsetClampARB($p, $2, $3, $4, $5, $6)$z\")\n"; - if (isReadOnly) - sb << "[__readNone]\n"; - sb << "T SampleGrad(" << samplerStateParam; - sb << "float" << base.coordCount + isArray << " location, "; - sb << "float" << base.coordCount << " gradX, "; - sb << "float" << base.coordCount << " gradY, "; - sb << "constexpr int" << base.coordCount << " offset, "; - sb << "float lodClamp);\n"; - - } - - // `SampleLevel` - - sb << "__target_intrinsic(glsl, \"$ctextureLod($p, $2, $3)$z\")\n"; - - // SPIR-V - { - // TODO: - // Need to: - // - Construct sampled image type OpTypeSampledImage of image type - // - Construct OpSampledImage from image and sampler - // - Call OpImageSampleExplicitLod - // test ./tests/compute/texture-simpler.slang - } - - // CUDA - { - const int coordCount = base.coordCount; - const int vecCount = coordCount + int(isArray); - - if( baseShape != TextureFlavor::Shape::ShapeCube ) - { - sb << "__target_intrinsic(cuda, \"tex" << coordCount << "D"; - if (isArray) - { - sb << "Layered"; - } - sb << "Lod<$T0>($0"; - for (int i = 0; i < coordCount; ++i) - { - sb << ", ($2)"; - if (vecCount > 1) - { - sb << '.' << char(i + 'x'); - } - } - if (isArray) - { - sb << ", int(($2)." << char(coordCount + 'x') << ")"; - } - sb << ", $3)\")\n"; - } - else - { - sb << "__target_intrinsic(cuda, \"texCubemap"; - if (isArray) - { - sb << "Layered"; - } - sb << "Lod<$T0>($0, ($2).x, ($2).y, ($2).z"; - if (isArray) - { - sb << ", int(($2).w)"; - } - sb << ", $3)\")\n"; - } - } - if (isReadOnly) - sb << "[__readNone]\n"; - sb << "T SampleLevel(" << samplerStateParam; - sb << "float" << base.coordCount + isArray << " location, "; - sb << "float level);\n"; - - if( baseShape != TextureFlavor::Shape::ShapeCube ) - { - sb << "__target_intrinsic(glsl, \"$ctextureLodOffset($p, $2, $3, $4)$z\")\n"; - if (isReadOnly) - sb << "[__readNone]\n"; - sb << "T SampleLevel(" << samplerStateParam; - sb << "float" << base.coordCount + isArray << " location, "; - sb << "float level, "; - sb << "constexpr int" << base.coordCount << " offset);\n"; - } - } - - sb << "\n};\n"; - - // `Gather*()` operations are handled via an `extension` declaration, - // because this lets us capture the element type of the texture. - // - // TODO: longer-term there should be something like a `TextureElementType` - // interface, that both scalars and vectors implement, that then exposes - // a `Scalar` associated type, and `Gather` can return `vector<T.Scalar, 4>`. - // - static const struct { - char const* genericPrefix; - char const* elementType; - char const* outputType; - } kGatherExtensionCases[] = { - { "__generic<T, let N : int>", "vector<T,N>", "vector<T, 4>" }, - { "", "float", "vector<float, 4>" }, - { "", "int" , "vector<int, 4>"}, - { "", "uint", "vector<uint, 4>"}, - - // TODO: need a case here for scalars `T`, but also - // need to ensure that case doesn't accidentally match - // for `T = vector<...>`, which requires actual checking - // of constraints on generic parameters. - }; - for(auto cc : kGatherExtensionCases) - { - // TODO: this should really be an `if` around the entire `Gather` logic - if (isMultisample) break; - - EMIT_LINE_DIRECTIVE(); - sb << cc.genericPrefix << " __extension "; - sb << accessInfo.name; - sb << baseName; - sb << baseShapeName; - if (isArray) sb << "Array"; - sb << "<" << cc.elementType << " >"; - sb << "\n{\n"; - - // `Gather` - // (tricky because it returns a 4-vector of the element type - // of the texture components...) - // - // TODO: is it actually correct to restrict these so that, e.g., - // `GatherAlpha()` isn't allowed on `Texture2D<float3>` because - // it nominally doesn't have an alpha component? - static const struct { - int componentIndex; - char const* componentName; - } kGatherComponets[] = { - { 0, "" }, - { 0, "Red" }, - { 1, "Green" }, - { 2, "Blue" }, - { 3, "Alpha" }, - }; - enum Cmp - { NotCmp, - Cmp - }; - - for(auto cmp : {NotCmp, Cmp}) - for(auto kk : kGatherComponets) - { - auto samplerOrComparisonSampler = cmp == Cmp ? "SamplerComparisonState s, " : samplerStateParam; - - auto componentIndex = kk.componentIndex; - auto componentName = kk.componentName; - - auto outputType = cc.outputType; - - const auto cmpName = cmp == Cmp ? "Cmp" : ""; - const auto cmpValueParam = cmp == Cmp ? "float compareValue, " : ""; - const auto cmpValueParamEnd = cmp == Cmp ? ", float compareValue" : ""; - const auto supportsGLSL = componentIndex == 0 || cmp == NotCmp; - - EMIT_LINE_DIRECTIVE(); - - if(supportsGLSL) - { - if(cmp == Cmp) - sb << "__target_intrinsic(glsl, \"textureGather($p, $2, $3)\")\n"; - else - sb << "__target_intrinsic(glsl, \"textureGather($p, $2, " << componentIndex << ")\")\n"; - } - if (base.coordCount == 2 && cmp == NotCmp) - { - // Gather only works on 2D in CUDA without comparison - // "It is based on the base type of DataType except when readMode is equal to cudaReadModeNormalizedFloat (see Texture Reference API), in which case it is always float4." - sb << "__target_intrinsic(cuda, \"tex2Dgather<$T0>($0, ($2).x, ($2).y, " << componentIndex << ")\")\n"; - } - if (isReadOnly) - sb << "[__readNone]\n"; - sb << outputType << " Gather" << cmpName << componentName << "(" << samplerOrComparisonSampler; - sb << "float" << base.coordCount + isArray << " location" << cmpValueParamEnd << ");\n"; - - if (isReadOnly) - sb << "[__readNone]\n"; - EMIT_LINE_DIRECTIVE(); - if(supportsGLSL) - { - if(cmp == Cmp) - sb << "__target_intrinsic(glsl, \"textureGatherOffset($p, $2, $3, $4)\")\n"; - else - sb << "__target_intrinsic(glsl, \"textureGatherOffset($p, $2, $3, " << componentIndex << ")\")\n"; - } - sb << outputType << " Gather" << cmpName << componentName << "(" << samplerOrComparisonSampler; - sb << "float" << base.coordCount + isArray << " location, "; - sb << cmpValueParam; - sb << "constexpr int" << base.coordCount << " offset);\n"; - - if (isReadOnly) - sb << "[__readNone]\n"; - EMIT_LINE_DIRECTIVE(); - sb << outputType << " Gather" << cmpName << componentName << "(" << samplerOrComparisonSampler; - sb << "float" << base.coordCount + isArray << " location, "; - sb << cmpValueParam; - sb << "constexpr int" << base.coordCount << " offset, "; - sb << "out uint status);\n"; - - if (isReadOnly) - sb << "[__readNone]\n"; - EMIT_LINE_DIRECTIVE(); - if(supportsGLSL) - { - if(cmp == Cmp) - sb << "__target_intrinsic(glsl, \"textureGatherOffsets($p, $2, $3, ivec" << base.coordCount << "[]($4, $5, $6, $7))\")\n"; - else - sb << "__target_intrinsic(glsl, \"textureGatherOffsets($p, $2, ivec" << base.coordCount << "[]($3, $4, $5, $6), " << componentIndex << ")\")\n"; - } - sb << outputType << " Gather" << cmpName << componentName << "(" << samplerOrComparisonSampler; - sb << "float" << base.coordCount + isArray << " location, "; - sb << cmpValueParam; - sb << "int" << base.coordCount << " offset1, "; - sb << "int" << base.coordCount << " offset2, "; - sb << "int" << base.coordCount << " offset3, "; - sb << "int" << base.coordCount << " offset4);\n"; - - if (isReadOnly) - sb << "[__readNone]\n"; - EMIT_LINE_DIRECTIVE(); - sb << outputType << " Gather" << cmpName << componentName << "(" << samplerOrComparisonSampler; - sb << "float" << base.coordCount + isArray << " location, "; - sb << cmpValueParam; - sb << "int" << base.coordCount << " offset1, "; - sb << "int" << base.coordCount << " offset2, "; - sb << "int" << base.coordCount << " offset3, "; - sb << "int" << base.coordCount << " offset4, "; - sb << "out uint status);\n"; - } - - EMIT_LINE_DIRECTIVE(); - sb << "\n}\n"; - } - } // TextureTypeInfo::emitTypeDecl -}; // struct TextureTypeInfo - for(auto& prefixInfo : kTexturePrefixes) for(auto& shapeInfo : kBaseTextureShapes) for(int isArray = 0; isArray < 2; ++isArray) @@ -3067,6 +2017,17 @@ bool __isSignedInt() return __isSignedInt_impl(__declVal<T>()); } +__generic<T> +__intrinsic_op($(kIROp_IsVector)) +bool __isVector_impl(T t); + +__generic<T> +[__unsafeForceInlineEarly] +bool __isVector() +{ + return __isVector_impl(__declVal<T>()); +} + // Provide implementations to public generic arithmetic interfaces for builtin types. ${{{{ diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang index 23815d2e9..c195428e3 100644 --- a/source/slang/hlsl.meta.slang +++ b/source/slang/hlsl.meta.slang @@ -5073,10 +5073,9 @@ WaveMask WaveGetActiveMask() __intrinsic_asm "WaveActiveBallot(true).x"; case spirv: let _true = true; - let _scope = 3; // subgroup return (spirv_asm { - OpGroupNonUniformBallot $$uint4 result $_scope $_true + OpGroupNonUniformBallot $$uint4 result Subgroup $_true }).x; default: return __WaveGetActiveMask(); @@ -5097,10 +5096,9 @@ bool WaveMaskIsFirstLane(WaveMask mask) case hlsl: __intrinsic_asm "WaveIsFirstLane()"; case spirv: - let _scope = 3u; // subgroup return spirv_asm { - OpGroupNonUniformElect $$bool result $_scope + OpGroupNonUniformElect $$bool result Subgroup }; default: return false; @@ -5121,10 +5119,9 @@ bool WaveMaskAllTrue(WaveMask mask, bool condition) case hlsl: __intrinsic_asm "WaveActiveAllTrue($1)"; case spirv: - let _scope = 3u; // subgroup return spirv_asm { - OpGroupNonUniformAll $$bool result $_scope $condition + OpGroupNonUniformAll $$bool result Subgroup $condition }; default: return false; @@ -5145,10 +5142,9 @@ bool WaveMaskAnyTrue(WaveMask mask, bool condition) case hlsl: __intrinsic_asm "WaveActiveAnyTrue($1)"; case spirv: - let _scope = 3u; // subgroup return spirv_asm { - OpGroupNonUniformAny $$bool result $_scope $condition + OpGroupNonUniformAny $$bool result Subgroup $condition }; default: return false; @@ -5169,10 +5165,9 @@ WaveMask WaveMaskBallot(WaveMask mask, bool condition) case hlsl: __intrinsic_asm "WaveActiveBallot($1)"; case spirv: - let _scope = 3u; // subgroup return (spirv_asm { - OpGroupNonUniformBallot $$uint4 result $_scope $condition + OpGroupNonUniformBallot $$uint4 result Subgroup $condition }).x; default: return 0; @@ -5302,8 +5297,8 @@ T WaveMaskBroadcastLaneAt(WaveMask mask, T value, constexpr int lane) case cuda: __intrinsic_asm "__shfl_sync($0, $1, $2)"; case hlsl: __intrinsic_asm "WaveReadLaneAt($1, $2)"; case spirv: - let _scope = 3u; // subgroup - return spirv_asm {OpGroupNonUniformBroadcast $$T result $_scope $value $lane}; + let ulane = uint(lane); + return spirv_asm {OpGroupNonUniformBroadcast $$T result Subgroup $value $ulane}; } } @@ -5319,8 +5314,8 @@ vector<T,N> WaveMaskBroadcastLaneAt(WaveMask mask, vector<T,N> value, constexpr case cuda: __intrinsic_asm "_waveShuffleMultiple($0, $1, $2)"; case hlsl: __intrinsic_asm "WaveReadLaneAt($1, $2)"; case spirv: - let _scope = 3u; // subgroup - return spirv_asm {OpGroupNonUniformBroadcast $$vector<T,N> result $_scope $value $lane}; + let ulane = uint(lane); + return spirv_asm {OpGroupNonUniformBroadcast $$vector<T,N> result Subgroup $value $ulane}; } } __generic<T : __BuiltinType, let N : int, let M : int> @@ -5342,8 +5337,8 @@ T WaveMaskReadLaneAt(WaveMask mask, T value, int lane) case cuda: __intrinsic_asm "__shfl_sync($0, $1, $2)"; case hlsl: __intrinsic_asm "WaveReadLaneAt($1, $2)"; case spirv: - let _scope = 3u; // subgroup - return spirv_asm {OpGroupNonUniformShuffle $$T result $_scope $value $lane}; + let ulane = uint(lane); + return spirv_asm {OpGroupNonUniformShuffle $$T result Subgroup $value $ulane}; } } __generic<T : __BuiltinType, let N : int> @@ -5358,8 +5353,8 @@ vector<T,N> WaveMaskReadLaneAt(WaveMask mask, vector<T,N> value, int lane) case cuda: __intrinsic_asm "_waveShuffleMultiple($0, $1, $2)"; case hlsl: __intrinsic_asm "WaveReadLaneAt($1, $2)"; case spirv: - let _scope = 3u; // subgroup - return spirv_asm {OpGroupNonUniformShuffle $$vector<T,N> result $_scope $value $lane}; + let ulane = uint(lane); + return spirv_asm {OpGroupNonUniformShuffle $$vector<T,N> result Subgroup $value $ulane}; } } __generic<T : __BuiltinType, let N : int, let M : int> @@ -5422,8 +5417,7 @@ T WaveMaskBitAnd(WaveMask mask, T expr) case cuda: __intrinsic_asm "_waveAnd($0, $1)"; case hlsl: __intrinsic_asm "WaveActiveBitAnd($1)"; case spirv: - let _scope = 3u; // subgroup - return spirv_asm {OpGroupNonUniformBitwiseAnd $$T result $_scope 0 $expr}; + return spirv_asm {OpGroupNonUniformBitwiseAnd $$T result Subgroup 0 $expr}; } } @@ -5439,8 +5433,7 @@ vector<T,N> WaveMaskBitAnd(WaveMask mask, vector<T,N> expr) case cuda: __intrinsic_asm "_waveAndMultiple($0, $1)"; case hlsl: __intrinsic_asm "WaveActiveBitAnd($1)"; case spirv: - let _scope = 3u; // subgroup - return spirv_asm {OpGroupNonUniformBitwiseAnd $$vector<T,N> result $_scope 0 $expr}; + return spirv_asm {OpGroupNonUniformBitwiseAnd $$vector<T,N> result Subgroup 0 $expr}; } } __generic<T : __BuiltinIntegerType, let N : int, let M : int> @@ -5460,8 +5453,7 @@ T WaveMaskBitOr(WaveMask mask, T expr) case cuda: __intrinsic_asm "_waveOr($0, $1)"; case hlsl: __intrinsic_asm "WaveActiveBitOr($1)"; case spirv: - let _scope = 3u; // subgroup - return spirv_asm {OpGroupNonUniformBitwiseOr $$T result $_scope 0 $expr}; + return spirv_asm {OpGroupNonUniformBitwiseOr $$T result Subgroup 0 $expr}; } } __generic<T : __BuiltinIntegerType, let N : int> @@ -5476,8 +5468,7 @@ vector<T,N> WaveMaskBitOr(WaveMask mask, vector<T,N> expr) case cuda: __intrinsic_asm "_waveOrMultiple($0, $1)"; case hlsl: __intrinsic_asm "WaveActiveBitOr($1)"; case spirv: - let _scope = 3u; // subgroup - return spirv_asm {OpGroupNonUniformBitwiseOr $$vector<T,N> result $_scope 0 $expr}; + return spirv_asm {OpGroupNonUniformBitwiseOr $$vector<T,N> result Subgroup 0 $expr}; } } __generic<T : __BuiltinIntegerType, let N : int, let M : int> @@ -5497,8 +5488,7 @@ T WaveMaskBitXor(WaveMask mask, T expr) case cuda: __intrinsic_asm "_waveXor($0, $1)"; case hlsl: __intrinsic_asm "WaveActiveBitXor($1)"; case spirv: - let _scope = 3u; // subgroup - return spirv_asm {OpGroupNonUniformBitwiseXor $$T result $_scope 0 $expr}; + return spirv_asm {OpGroupNonUniformBitwiseXor $$T result Subgroup 0 $expr}; } } __generic<T : __BuiltinIntegerType, let N : int> @@ -5513,8 +5503,7 @@ vector<T,N> WaveMaskBitXor(WaveMask mask, vector<T,N> expr) case cuda: __intrinsic_asm "_waveXorMultiple($0, $1)"; case hlsl: __intrinsic_asm "WaveActiveBitXor($1)"; case spirv: - let _scope = 3u; // subgroup - return spirv_asm {OpGroupNonUniformBitwiseXor $$vector<T,N> result $_scope 0 $expr}; + return spirv_asm {OpGroupNonUniformBitwiseXor $$vector<T,N> result Subgroup 0 $expr}; } } __generic<T : __BuiltinIntegerType, let N : int, let M : int> @@ -5534,13 +5523,12 @@ T WaveMaskMax(WaveMask mask, T expr) case cuda: __intrinsic_asm "_waveMax($0, $1)"; case hlsl: __intrinsic_asm "WaveActiveMax($1)"; case spirv: - let _scope = 3u; // subgroup if (__isFloat<T>()) - return spirv_asm {OpGroupNonUniformFMax $$T result $_scope 0 $expr}; + return spirv_asm {OpGroupNonUniformFMax $$T result Subgroup 0 $expr}; else if (__isSignedInt<T>()) - return spirv_asm {OpGroupNonUniformSMax $$T result $_scope 0 $expr}; + return spirv_asm {OpGroupNonUniformSMax $$T result Subgroup 0 $expr}; else if (__isUnsignedInt<T>()) - return spirv_asm {OpGroupNonUniformUMax $$T result $_scope 0 $expr}; + return spirv_asm {OpGroupNonUniformUMax $$T result Subgroup 0 $expr}; } } __generic<T : __BuiltinArithmeticType, let N : int> @@ -5555,13 +5543,12 @@ vector<T,N> WaveMaskMax(WaveMask mask, vector<T,N> expr) case cuda: __intrinsic_asm "_waveMaxMultiple($0, $1)"; case hlsl: __intrinsic_asm "WaveActiveMax($1)"; case spirv: - let _scope = 3u; // subgroup if (__isFloat<T>()) - return spirv_asm {OpGroupNonUniformFMax $$vector<T,N> result $_scope 0 $expr}; + return spirv_asm {OpGroupNonUniformFMax $$vector<T,N> result Subgroup 0 $expr}; else if (__isSignedInt<T>()) - return spirv_asm {OpGroupNonUniformSMax $$vector<T,N> result $_scope 0 $expr}; + return spirv_asm {OpGroupNonUniformSMax $$vector<T,N> result Subgroup 0 $expr}; else if (__isUnsignedInt<T>()) - return spirv_asm {OpGroupNonUniformUMax $$vector<T,N> result $_scope 0 $expr}; + return spirv_asm {OpGroupNonUniformUMax $$vector<T,N> result Subgroup 0 $expr}; } } @@ -5582,13 +5569,12 @@ T WaveMaskMin(WaveMask mask, T expr) case cuda: __intrinsic_asm "_waveMin($0, $1)"; case hlsl: __intrinsic_asm "WaveActiveMin($1)"; case spirv: - let _scope = 3u; // subgroup if (__isFloat<T>()) - return spirv_asm {OpGroupNonUniformFMin $$T result $_scope 0 $expr}; + return spirv_asm {OpGroupNonUniformFMin $$T result Subgroup 0 $expr}; else if (__isSignedInt<T>()) - return spirv_asm {OpGroupNonUniformSMin $$T result $_scope 0 $expr}; + return spirv_asm {OpGroupNonUniformSMin $$T result Subgroup 0 $expr}; else if (__isUnsignedInt<T>()) - return spirv_asm {OpGroupNonUniformUMin $$T result $_scope 0 $expr}; + return spirv_asm {OpGroupNonUniformUMin $$T result Subgroup 0 $expr}; } } @@ -5604,13 +5590,12 @@ vector<T,N> WaveMaskMin(WaveMask mask, vector<T,N> expr) case cuda: __intrinsic_asm "_waveMinMultiple($0, $1)"; case hlsl: __intrinsic_asm "WaveActiveMin($1)"; case spirv: - let _scope = 3u; // subgroup if (__isFloat<T>()) - return spirv_asm {OpGroupNonUniformFMin $$vector<T,N> result $_scope 0 $expr}; + return spirv_asm {OpGroupNonUniformFMin $$vector<T,N> result Subgroup 0 $expr}; else if (__isSignedInt<T>()) - return spirv_asm {OpGroupNonUniformSMin $$vector<T,N> result $_scope 0 $expr}; + return spirv_asm {OpGroupNonUniformSMin $$vector<T,N> result Subgroup 0 $expr}; else if (__isUnsignedInt<T>()) - return spirv_asm {OpGroupNonUniformUMin $$vector<T,N> result $_scope 0 $expr}; + return spirv_asm {OpGroupNonUniformUMin $$vector<T,N> result Subgroup 0 $expr}; } } @@ -5631,21 +5616,20 @@ T WaveMaskProduct(WaveMask mask, T expr) case cuda: __intrinsic_asm "_waveProduct($0, $1)"; case hlsl: __intrinsic_asm "WaveActiveProduct($1)"; case spirv: - let _scope = 3u; // subgroup if (__isFloat<T>()) - return spirv_asm {OpGroupNonUniformFMul $$T result $_scope 0 $expr}; + return spirv_asm {OpGroupNonUniformFMul $$T result Subgroup 0 $expr}; else if (__isSignedInt<T>()) { return spirv_asm { // TODO: use the correct integer width OpBitcast $$uint %uvalue $expr; - OpGroupNonUniformIMul $$T %mulResult $_scope 0 %uvalue; + OpGroupNonUniformIMul $$T %mulResult Subgroup 0 %uvalue; OpBitcast $$T result %mulResult }; } else if (__isUnsignedInt<T>()) - return spirv_asm {OpGroupNonUniformIMul $$T result $_scope 0 $expr}; + return spirv_asm {OpGroupNonUniformIMul $$T result Subgroup 0 $expr}; } } @@ -5661,21 +5645,20 @@ vector<T,N> WaveMaskProduct(WaveMask mask, vector<T,N> expr) case cuda: __intrinsic_asm "_waveProductMultiple($0, $1)"; case hlsl: __intrinsic_asm "WaveActiveProduct($1)"; case spirv: - let _scope = 3u; // subgroup if (__isFloat<T>()) - return spirv_asm {OpGroupNonUniformFMul $$vector<T,N> result $_scope 0 $expr}; + return spirv_asm {OpGroupNonUniformFMul $$vector<T,N> result Subgroup 0 $expr}; else if (__isSignedInt<T>()) { return spirv_asm { // TODO: use the correct integer width OpBitcast $$vector<uint,N> %uvalue $expr; - OpGroupNonUniformIMul $$vector<uint,N> %mulResult $_scope 0 %uvalue; + OpGroupNonUniformIMul $$vector<uint,N> %mulResult Subgroup 0 %uvalue; OpBitcast $$vector<T,N> result %mulResult }; } else if (__isUnsignedInt<T>()) - return spirv_asm {OpGroupNonUniformIMul $$vector<T,N> result $_scope 0 $expr}; + return spirv_asm {OpGroupNonUniformIMul $$vector<T,N> result Subgroup 0 $expr}; } } @@ -5696,21 +5679,20 @@ T WaveMaskSum(WaveMask mask, T expr) case cuda: __intrinsic_asm "_waveSum($0, $1)"; case hlsl: __intrinsic_asm "WaveActiveSum($1)"; case spirv: - let _scope = 3u; // subgroup if (__isFloat<T>()) - return spirv_asm {OpGroupNonUniformFAdd $$T result $_scope 0 $expr}; + return spirv_asm {OpGroupNonUniformFAdd $$T result Subgroup 0 $expr}; else if (__isSignedInt<T>()) { return spirv_asm { // TODO: use the correct integer width OpBitcast $$uint %uvalue $expr; - OpGroupNonUniformIAdd $$T %mulResult $_scope 0 %uvalue; + OpGroupNonUniformIAdd $$T %mulResult Subgroup 0 %uvalue; OpBitcast $$T result %mulResult }; } else if (__isUnsignedInt<T>()) - return spirv_asm {OpGroupNonUniformIAdd $$T result $_scope 0 $expr}; + return spirv_asm {OpGroupNonUniformIAdd $$T result Subgroup 0 $expr}; } } __generic<T : __BuiltinArithmeticType, let N : int> @@ -5725,21 +5707,20 @@ vector<T,N> WaveMaskSum(WaveMask mask, vector<T,N> expr) case cuda: __intrinsic_asm "_waveSumMultiple($0, $1)"; case hlsl: __intrinsic_asm "WaveActiveSum($1)"; case spirv: - let _scope = 3u; // subgroup if (__isFloat<T>()) - return spirv_asm {OpGroupNonUniformFAdd $$vector<T,N> result $_scope 0 $expr}; + return spirv_asm {OpGroupNonUniformFAdd $$vector<T,N> result Subgroup 0 $expr}; else if (__isSignedInt<T>()) { return spirv_asm { // TODO: use the correct integer width OpBitcast $$vector<uint,N> %uvalue $expr; - OpGroupNonUniformIAdd $$vector<uint,N> %mulResult $_scope 0 %uvalue; + OpGroupNonUniformIAdd $$vector<uint,N> %mulResult Subgroup 0 %uvalue; OpBitcast $$vector<T,N> result %mulResult }; } else if (__isUnsignedInt<T>()) - return spirv_asm {OpGroupNonUniformIAdd $$vector<T,N> result $_scope 0 $expr}; + return spirv_asm {OpGroupNonUniformIAdd $$vector<T,N> result Subgroup 0 $expr}; } } __generic<T : __BuiltinArithmeticType, let N : int, let M : int> @@ -5763,10 +5744,9 @@ bool WaveMaskAllEqual(WaveMask mask, T value) case cuda: __intrinsic_asm "_waveAllEqual($0, $1)"; case spirv: - let _scope = 3u; // subgroup return spirv_asm { - OpGroupNonUniformAllEqual $$bool result $_scope $value + OpGroupNonUniformAllEqual $$bool result Subgroup $value }; default: return false; @@ -5788,10 +5768,9 @@ bool WaveMaskAllEqual(WaveMask mask, vector<T,N> value) case cuda: __intrinsic_asm "_waveAllEqualMultiple($0, $1)"; case spirv: - let _scope = 3u; // subgroup return spirv_asm { - OpGroupNonUniformAllEqual $$bool result $_scope $value + OpGroupNonUniformAllEqual $$bool result Subgroup $value }; default: return false; @@ -6526,10 +6505,9 @@ uint4 WaveActiveBallot(bool condition) case hlsl: __intrinsic_asm "WaveActiveBallot"; case spirv: - let _scope = 3u; // Subgroup return spirv_asm { - OpGroupNonUniformBallot $$uint4 result $_scope $condition + OpGroupNonUniformBallot $$uint4 result Subgroup $condition }; default: return WaveMaskBallot(WaveGetActiveMask(), condition); @@ -6821,7 +6799,8 @@ T WaveBroadcastLaneAt(T value, constexpr int lane) case glsl: __intrinsic_asm "subgroupBroadcast($0, $1)"; case hlsl: __intrinsic_asm "WaveReadLaneAt"; case spirv: - return spirv_asm {OpGroupNonUniformBroadcast $$T result Subgroup $value $lane}; + let ulane = uint(lane); + return spirv_asm {OpGroupNonUniformBroadcast $$T result Subgroup $value $ulane}; default: return WaveMaskBroadcastLaneAt(WaveGetActiveMask(), value, lane); } @@ -6838,7 +6817,8 @@ vector<T,N> WaveBroadcastLaneAt(vector<T,N> value, constexpr int lane) case glsl: __intrinsic_asm "subgroupBroadcast($0, $1)"; case hlsl: __intrinsic_asm "WaveReadLaneAt"; case spirv: - return spirv_asm {OpGroupNonUniformBroadcast $$vector<T,N> result Subgroup $value $lane}; + let ulane = uint(lane); + return spirv_asm {OpGroupNonUniformBroadcast $$vector<T,N> result Subgroup $value $ulane}; default: return WaveMaskBroadcastLaneAt(WaveGetActiveMask(), value, lane); } @@ -6865,7 +6845,8 @@ T WaveReadLaneAt(T value, int lane) case glsl: __intrinsic_asm "subgroupShuffle($0, $1)"; case hlsl: __intrinsic_asm "WaveReadLaneAt"; case spirv: - return spirv_asm {OpGroupNonUniformShuffle $$T result Subgroup $value $lane}; + let ulane = uint(lane); + return spirv_asm {OpGroupNonUniformShuffle $$T result Subgroup $value $ulane}; default: return WaveMaskReadLaneAt(WaveGetActiveMask(), value, lane); } @@ -6882,7 +6863,8 @@ vector<T,N> WaveReadLaneAt(vector<T,N> value, int lane) case glsl: __intrinsic_asm "subgroupShuffle($0, $1)"; case hlsl: __intrinsic_asm "WaveReadLaneAt"; case spirv: - return spirv_asm {OpGroupNonUniformShuffle $$vector<T,N> result Subgroup $value $lane}; + let ulane = uint(lane); + return spirv_asm {OpGroupNonUniformShuffle $$vector<T,N> result Subgroup $value $ulane}; default: return WaveMaskReadLaneAt(WaveGetActiveMask(), value, lane); } @@ -6910,7 +6892,8 @@ T WaveShuffle(T value, int lane) case glsl: __intrinsic_asm "subgroupShuffle($0, $1)"; case hlsl: __intrinsic_asm "WaveReadLaneAt"; case spirv: - return spirv_asm {OpGroupNonUniformShuffle $$T result Subgroup $value $lane}; + let ulane = uint(lane); + return spirv_asm {OpGroupNonUniformShuffle $$T result Subgroup $value $ulane}; default: return WaveMaskShuffle(WaveGetActiveMask(), value, lane); } @@ -6927,7 +6910,8 @@ vector<T,N> WaveShuffle(vector<T,N> value, int lane) case glsl: __intrinsic_asm "subgroupShuffle($0, $1)"; case hlsl: __intrinsic_asm "WaveReadLaneAt"; case spirv: - return spirv_asm {OpGroupNonUniformShuffle $$vector<T,N> result Subgroup $value $lane}; + let ulane = uint(lane); + return spirv_asm {OpGroupNonUniformShuffle $$vector<T,N> result Subgroup $value $ulane}; default: return WaveMaskShuffle(WaveGetActiveMask(), value, lane); } diff --git a/source/slang/slang-ast-expr.h b/source/slang/slang-ast-expr.h index 6699426d5..9c604f6a0 100644 --- a/source/slang/slang-ast-expr.h +++ b/source/slang/slang-ast-expr.h @@ -657,6 +657,8 @@ public: SlangValue, SlangValueAddr, SlangType, + SampledType, // __sampledType(T), this becomes a 4 vector of the component type of T + TruncateMarker, // __truncate, an invented instruction which coerces to the result type by truncating the element count BuiltinVar, GLSL450Set, }; diff --git a/source/slang/slang-check-expr.cpp b/source/slang/slang-check-expr.cpp index 055364d5e..88d95f04e 100644 --- a/source/slang/slang-check-expr.cpp +++ b/source/slang/slang-check-expr.cpp @@ -3942,7 +3942,7 @@ namespace Slang // be able to deduce types for operands const auto opInfo = spirvInfo->opInfos.lookup(SpvOp(inst.opcode.knownValue)); - if(opInfo->numOperandTypes == 0 && inst.operands.getCount()) + if(opInfo && opInfo->numOperandTypes == 0 && inst.operands.getCount()) { failed = true; getSink()->diagnose(inst.opcode.token, Diagnostics::spirvInstructionWithTooManyOperands, inst.opcode.token, 0); @@ -3953,16 +3953,21 @@ namespace Slang for(Index operandIndex = 0; operandIndex < inst.operands.getCount(); ++operandIndex) { // Clamp to the end of the type info array, because the last one will be any variable operands + const auto invalidOperandKind = SPIRVCoreGrammarInfo::OperandKind{0xff}; const auto operandType - = opInfo->operandTypes[std::min(operandIndex, Index(opInfo->numOperandTypes)-1)]; + = opInfo.has_value() + ? opInfo->operandTypes[std::min(operandIndex, Index(opInfo->numOperandTypes)-1)] + : invalidOperandKind; const auto baseOperandType = spirvInfo->operandKindUnderneathIds.lookup(operandType).value_or(operandType); const auto needsIdWrapper = baseOperandType != operandType; const auto check = [&](const auto& go, auto& operand) -> void { - if(operand.flavor == SPIRVAsmOperand::SlangType) + if(operand.flavor == SPIRVAsmOperand::SlangType + || operand.flavor == SPIRVAsmOperand::SampledType) { - // This is a $$type operand, fill in the TypeExp member of the operand + // This is a $$type operand or __sampledType(T) + // operand, fill in its TypeExp member. TypeExp& typeExpr = operand.type; typeExpr.exp = operand.expr; typeExpr = CheckProperType(typeExpr); diff --git a/source/slang/slang-diagnostic-defs.h b/source/slang/slang-diagnostic-defs.h index 7c8bab1ad..76e96f2d5 100644 --- a/source/slang/slang-diagnostic-defs.h +++ b/source/slang/slang-diagnostic-defs.h @@ -255,6 +255,8 @@ DIAGNOSTIC(29109, Error, spirvOperandRange, "Literal ints must be in the range 0 DIAGNOSTIC(29110, Error, unknownTargetName, "unknown target name '$0'") +DIAGNOSTIC(29111, Error, spirvInvalidTruncate, "__truncate has been given a source smaller than its target") + // // 3xxxx - Semantic analysis // diff --git a/source/slang/slang-emit-spirv.cpp b/source/slang/slang-emit-spirv.cpp index 846c4b5b4..07f1b2aee 100644 --- a/source/slang/slang-emit-spirv.cpp +++ b/source/slang/slang-emit-spirv.cpp @@ -1294,6 +1294,37 @@ struct SPIRVEmitContext } case kIROp_TextureType: { + // Some untyped constants from OpTypeImage + // https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#OpTypeImage + + // indicates not a depth image + [[maybe_unused]] + const SpvWord notDepthImage = 0; + // indicates a depth image + [[maybe_unused]] + const SpvWord isDepthImage = 1; + // means no indication as to whether this is a depth or non-depth image + const SpvWord unknownDepthImage = 2; + + // indicates non-arrayed content + const SpvWord notArrayed = 0; + // indicates arrayed content + const SpvWord isArrayed = 1; + + // indicates single-sampled content + const SpvWord notMultisampled = 0; + // indicates multisampled content + const SpvWord isMultisampled = 1; + + // indicates this is only known at run time, not at compile time + const SpvWord sampledUnknown = 0; + // indicates an image compatible with sampling operations + const SpvWord sampledImage = 1; + // indicates an image compatible with read/write operations (a storage or subpass data image). + const SpvWord readWriteImage = 2; + + // + const auto texTypeInst = as<IRTextureType>(inst); const auto sampledType = texTypeInst->getElementType(); SpvDim dim = SpvDim1D; // Silence uninitialized warnings from msvc... @@ -1318,16 +1349,78 @@ struct SPIRVEmitContext dim = SpvDimBuffer; break; } - bool arrayed = texTypeInst->isArray(); - SpvWord depth = 2; // No knowledge of if this is a depth image - bool ms = texTypeInst->isMultisample(); - // TODO: can we do better here? - SpvWord sampled = 0; // Only known at run time - // TODO: can we do better? + SpvWord arrayed = texTypeInst->isArray() ? isArrayed : notArrayed; + + // Vulkan spec 16.1: "The “Depth” operand of OpTypeImage is ignored." + SpvWord depth = unknownDepthImage; // No knowledge of if this is a depth image + SpvWord ms = texTypeInst->isMultisample() ? isMultisampled : notMultisampled; + + SpvWord sampled = sampledUnknown; + switch(texTypeInst->getAccess()) + { + case SlangResourceAccess::SLANG_RESOURCE_ACCESS_READ_WRITE: + case SlangResourceAccess::SLANG_RESOURCE_ACCESS_RASTER_ORDERED: + sampled = readWriteImage; + break; + case SlangResourceAccess::SLANG_RESOURCE_ACCESS_NONE: + case SlangResourceAccess::SLANG_RESOURCE_ACCESS_READ: + sampled = sampledImage; + break; + } + + // TODO: we need to do as _emitGLSLImageFormatModifier does, + // take a guess at the image format SpvImageFormat format = SpvImageFormatUnknown; + + // + // Capabilities, according to section 3.8 + // + // SPIR-V requires that the sampled/rw info on the image isn't unknown + SLANG_ASSERT(sampled == sampledImage || sampled == readWriteImage); + switch(dim) + { + case SpvDim1D: + requireSPIRVCapability(sampled == sampledImage ? SpvCapabilitySampled1D : SpvCapabilityImage1D); + break; + case SpvDim2D: + // Also requires Shader or Kernel, but these are a given (?) + if(sampled == readWriteImage && ms == isMultisampled && arrayed == isArrayed) + requireSPIRVCapability(SpvCapabilityImageMSArray); + break; + case SpvDim3D: + break; + case SpvDimCube: + // Requires shader also + if(sampled == readWriteImage && arrayed == isArrayed) + requireSPIRVCapability(SpvCapabilityImageCubeArray); + break; + case SpvDimRect: + requireSPIRVCapability(sampled == sampledImage ? SpvCapabilitySampledRect : SpvCapabilityImageRect); + break; + case SpvDimBuffer: + requireSPIRVCapability(sampled == sampledImage ? SpvCapabilitySampledBuffer : SpvCapabilityImageBuffer); + break; + case SpvDimSubpassData: + requireSPIRVCapability(SpvCapabilityInputAttachment); + break; + case SpvDimTileImageDataEXT: + SLANG_UNIMPLEMENTED_X("OpTypeImage Capabilities for SpvDimTileImageDataEXT"); + break; + } + if(format == SpvImageFormatUnknown && sampled == readWriteImage) + { + // TODO: It may not be necessary to have both of these + // depending on if we read or write + requireSPIRVCapability(SpvCapabilityStorageImageReadWithoutFormat); + requireSPIRVCapability(SpvCapabilityStorageImageWriteWithoutFormat); + } + + // + // The op itself + // return emitOpTypeImage( inst, - sampledType, + dropVector(sampledType), dim, SpvLiteralInteger::from32(depth), SpvLiteralInteger::from32(arrayed), @@ -1503,12 +1596,6 @@ struct SPIRVEmitContext varInst, SpvLiteralInteger::from32(int32_t(index)) ); - emitOpDecorateIndex( - getSection(SpvLogicalSectionID::Annotations), - nullptr, - varInst, - SpvLiteralInteger::from32(int32_t(space)) - ); break; case LayoutResourceKind::VaryingOutput: emitOpDecorateLocation( @@ -3829,7 +3916,6 @@ struct SPIRVEmitContext for(const auto spvInst : inst->getInsts()) { const bool isLast = spvInst == inst->getLastChild(); - const SpvOp opcode = SpvOp(spvInst->getOpcodeOperandWord()); const auto parentForOpCode = [this](SpvOp opcode, SpvInstParent* defaultParent) -> SpvInstParent*{ const auto info = m_grammarInfo->opInfos.lookup(opcode); @@ -3859,122 +3945,242 @@ struct SPIRVEmitContext } }; - switch (opcode) - { - case SpvOpCapability: - requireSPIRVCapability((SpvCapability)getIntVal(spvInst->getOperand(1)->getOperand(0))); - continue; - case SpvOpExtension: - ensureExtensionDeclaration(as<IRStringLit>(spvInst->getOperand(1)->getOperand(0))->getStringSlice()); - continue; - default: - break; - } + const auto emitSpvAsmOperand = [&](IRSPIRVAsmOperand* operand){ + switch(operand->getOp()) + { + case kIROp_SPIRVAsmOperandEnum: + case kIROp_SPIRVAsmOperandLiteral: + { + const auto v = as<IRConstant>(operand->getValue()); + SLANG_ASSERT(v); + if(operand->getOperandCount() >= 2) - last = emitInstCustomOperandFunc( - parentForOpCode(opcode, parent), - // We want the "result instruction" to refer to the top level - // block which assumes its value, the others are free to refer - // to whatever, so just use the internal spv inst rep - // TODO: This is not correct, because the instruction which is - // assigned to result is not necessarily the last instruction - isLast ? as<IRInst>(inst) : spvInst, - opcode, - [&](){ - for(const auto operand : spvInst->getSPIRVOperands()) { - switch(operand->getOp()) - { - case kIROp_SPIRVAsmOperandEnum: - case kIROp_SPIRVAsmOperandLiteral: + const auto constantType = cast<IRType>(operand->getOperand(1)); + SpvInst* constant; + switch(v->getOp()) { - const auto v = as<IRConstant>(operand->getValue()); - SLANG_ASSERT(v); - if(operand->getOperandCount() >= 2) - { - const auto constantType = cast<IRType>(operand->getOperand(1)); - SpvInst* constant; - switch(v->getOp()) - { - case kIROp_IntLit: - { - // TODO: range checking - const auto i = cast<IRIntLit>(v)->getValue(); - constant = emitIntConstant(i, constantType); - break; - } - case kIROp_StringLit: - SLANG_UNIMPLEMENTED_X("String constants in SPIR-V emit"); - default: - SLANG_UNREACHABLE("Unhandled case in emitSPIRVAsm"); - } - emitOperand(constant); - } - else - { - switch(v->getOp()) - { - case kIROp_StringLit: - emitOperand(SpvLiteralBits::fromUnownedStringSlice(v->getStringSlice())); - break; - case kIROp_IntLit: - { - // TODO: range checking - const auto i = cast<IRIntLit>(v)->getValue(); - emitOperand(SpvLiteralInteger::from32(uint32_t(i))); - break; - } - default: - SLANG_UNREACHABLE("Unhandled case in emitSPIRVAsm"); - } - } - break; - } - case kIROp_SPIRVAsmOperandInst: - { - const auto i = operand->getValue(); - emitOperand(ensureInst(i)); - - break; - } - case kIROp_SPIRVAsmOperandResult: + case kIROp_IntLit: { - SLANG_ASSERT(isLast); - emitOperand(kResultID); + // TODO: range checking + const auto i = cast<IRIntLit>(v)->getValue(); + constant = emitIntConstant(i, constantType); break; } - case kIROp_SPIRVAsmOperandId: - { - const auto idName = cast<IRStringLit>(operand->getValue())->getStringSlice(); - SpvWord id; - if(!idMap.tryGetValue(idName, id)) - { - id = freshID(); - idMap.set(idName, id); - } - emitOperand(id); - break; + case kIROp_StringLit: + SLANG_UNIMPLEMENTED_X("String constants in SPIR-V emit"); + default: + SLANG_UNREACHABLE("Unhandled case in emitSPIRVAsm"); } - case kIROp_SPIRVAsmOperandBuiltinVar: + emitOperand(constant); + } + else + { + switch(v->getOp()) { - const auto kind = (SpvBuiltIn)(getIntVal(operand->getOperand(0))); - IRBuilder builder(operand); - builder.setInsertBefore(operand); - auto varInst = getBuiltinGlobalVar(builder.getPtrType(kIROp_PtrType, operand->getDataType(), SpvStorageClassInput), kind); - emitOperand(varInst); + case kIROp_StringLit: + emitOperand(SpvLiteralBits::fromUnownedStringSlice(v->getStringSlice())); break; - } - case kIROp_SPIRVAsmOperandGLSL450Set: + case kIROp_IntLit: { - emitOperand(getGLSL450ExtInst()); + // TODO: range checking + const auto i = cast<IRIntLit>(v)->getValue(); + emitOperand(SpvLiteralInteger::from32(uint32_t(i))); break; } default: SLANG_UNREACHABLE("Unhandled case in emitSPIRVAsm"); } } + break; } - ); + case kIROp_SPIRVAsmOperandInst: + { + const auto i = operand->getValue(); + emitOperand(ensureInst(i)); + + break; + } + case kIROp_SPIRVAsmOperandResult: + { + SLANG_ASSERT(isLast); + emitOperand(kResultID); + break; + } + case kIROp_SPIRVAsmOperandId: + { + const auto idName = cast<IRStringLit>(operand->getValue())->getStringSlice(); + SpvWord id; + if(!idMap.tryGetValue(idName, id)) + { + id = freshID(); + idMap.set(idName, id); + } + emitOperand(id); + break; + } + case kIROp_SPIRVAsmOperandSampledType: + { + // Make a 4 vector of the component type + IRBuilder builder(m_irModule); + const auto elementType = cast<IRType>(operand->getValue()); + const auto sampledType = builder.getVectorType(dropVector(elementType), 4); + emitOperand(ensureInst(sampledType)); + break; + } + case kIROp_SPIRVAsmOperandBuiltinVar: + { + const auto kind = (SpvBuiltIn)(getIntVal(operand->getOperand(0))); + IRBuilder builder(operand); + builder.setInsertBefore(operand); + auto varInst = getBuiltinGlobalVar(builder.getPtrType(kIROp_PtrType, operand->getDataType(), SpvStorageClassInput), kind); + emitOperand(varInst); + break; + } + case kIROp_SPIRVAsmOperandGLSL450Set: + { + emitOperand(getGLSL450ExtInst()); + break; + } + default: + SLANG_UNREACHABLE("Unhandled case in emitSPIRVAsm"); + } + }; + + if(spvInst->getOpcodeOperand()->getOp() == kIROp_SPIRVAsmOperandTruncate) + { + const auto getSlangType = [&](IRSPIRVAsmOperand* operand) -> IRType*{ + switch(operand->getOp()) + { + case kIROp_SPIRVAsmOperandInst: + return cast<IRType>(operand->getValue()); + case kIROp_SPIRVAsmOperandSampledType: + { + // Make a 4 vector of the component type + IRBuilder builder(m_irModule); + const auto elementType = cast<IRType>(operand->getValue()); + return builder.getVectorType(dropVector(elementType), 4); + } + case kIROp_SPIRVAsmOperandEnum: + case kIROp_SPIRVAsmOperandLiteral: + case kIROp_SPIRVAsmOperandResult: + case kIROp_SPIRVAsmOperandId: + SLANG_UNEXPECTED("truncate should have been given slang types"); + default: + SLANG_UNREACHABLE("Unhandled case in emitSPIRVAsm"); + } + }; + + SLANG_ASSERT(spvInst->getSPIRVOperands().getCount() == 4); + const auto toType = getSlangType(spvInst->getSPIRVOperands()[0]); + const auto toIdOperand = spvInst->getSPIRVOperands()[1]; + const auto fromType = getSlangType(spvInst->getSPIRVOperands()[2]); + const auto fromIdOperand = spvInst->getSPIRVOperands()[3]; + + // The component types must be the same + SLANG_ASSERT(isTypeEqual(dropVector(toType), dropVector(fromType))); + + // If we don't need truncation, but a different result ID is + // expected, then just unify them in the idMap + if(isTypeEqual(toType, fromType)) + { + // TODO: if this is the last inst, we should just remove it + // and rewrite the penultimate one + last = emitInstCustomOperandFunc( + parent, + isLast ? as<IRInst>(inst) : spvInst, + SpvOpCopyObject, + [&](){ + emitOperand(toType); + emitSpvAsmOperand(toIdOperand); + emitSpvAsmOperand(fromIdOperand); + } + ); + } + // Otherwise, if we are truncating to a scalar, extract the first element + else if(!as<IRVectorType>(toType)) + { + last = emitInstCustomOperandFunc( + parent, + isLast ? as<IRInst>(inst) : spvInst, + SpvOpCompositeExtract, + [&](){ + emitOperand(toType); + emitSpvAsmOperand(toIdOperand); + emitSpvAsmOperand(fromIdOperand); + emitOperand(SpvLiteralInteger::from32(0)); + } + ); + } + // Otherwise, if we are truncating to a 1-vector from a scalar + else if(as<IRVectorType>(toType) && !as<IRVectorType>(fromType)) + { + last = emitInstCustomOperandFunc( + parent, + isLast ? as<IRInst>(inst) : spvInst, + SpvOpCompositeConstruct, + [&](){ + emitOperand(toType); + emitSpvAsmOperand(toIdOperand); + emitSpvAsmOperand(fromIdOperand); + } + ); + } + // Otherwise, we are truncating a vector to a smaller vector + else + { + const auto toVector = cast<IRVectorType>(toType); + const auto toVectorSize = getIntVal(toVector->getElementCount()); + const auto fromVector = cast<IRVectorType>(fromType); + const auto fromVectorSize = getIntVal(fromVector->getElementCount()); + if(toVectorSize > fromVectorSize) + m_sink->diagnose(inst, Diagnostics::spirvInvalidTruncate); + last = emitInstCustomOperandFunc( + parent, + isLast ? as<IRInst>(inst) : spvInst, + SpvOpVectorShuffle, + [&](){ + emitOperand(toType); + emitSpvAsmOperand(toIdOperand); + emitSpvAsmOperand(fromIdOperand); + emitOperand(emitOpUndef(parent, nullptr, fromVector)); + for(Int32 i = 0; i < toVectorSize; ++i) + emitOperand(SpvLiteralInteger::from32(i)); + } + ); + } + } + else + { + const SpvOp opcode = SpvOp(spvInst->getOpcodeOperandWord()); + + switch (opcode) + { + case SpvOpCapability: + requireSPIRVCapability((SpvCapability)getIntVal(spvInst->getOperand(1)->getOperand(0))); + continue; + case SpvOpExtension: + ensureExtensionDeclaration(as<IRStringLit>(spvInst->getOperand(1)->getOperand(0))->getStringSlice()); + continue; + default: + break; + } + + last = emitInstCustomOperandFunc( + parentForOpCode(opcode, parent), + // We want the "result instruction" to refer to the top level + // block which assumes its value, the others are free to refer + // to whatever, so just use the internal spv inst rep + // TODO: This is not correct, because the instruction which is + // assigned to result is not necessarily the last instruction + isLast ? as<IRInst>(inst) : spvInst, + opcode, + [&](){ + for(const auto operand : spvInst->getSPIRVOperands()) + emitSpvAsmOperand(operand); + } + ); + } } for(const auto& [name, id] : idMap) diff --git a/source/slang/slang-emit.cpp b/source/slang/slang-emit.cpp index 8dc0d2983..68fd81cee 100644 --- a/source/slang/slang-emit.cpp +++ b/source/slang/slang-emit.cpp @@ -42,6 +42,7 @@ #include "slang-ir-lower-size-of.h" #include "slang-ir-lower-reinterpret.h" #include "slang-ir-loop-unroll.h" +#include "slang-ir-legalize-vector-types.h" #include "slang-ir-metadata.h" #include "slang-ir-optix-entry-point-uniforms.h" #include "slang-ir-restructure.h" @@ -570,6 +571,12 @@ Result linkAndOptimizeIR( sink); } + if(isKhronosTarget(targetRequest)) + { + // SPIR-V doesn't support 1-vectors + legalizeVectorTypes(irModule, sink); + } + // Once specialization and type legalization have been performed, // we should perform some of our basic optimization steps again, // to see if we can clean up any temporaries created by legalization. diff --git a/source/slang/slang-ir-inst-defs.h b/source/slang/slang-ir-inst-defs.h index b248012a0..f48801162 100644 --- a/source/slang/slang-ir-inst-defs.h +++ b/source/slang/slang-ir-inst-defs.h @@ -983,6 +983,7 @@ INST(IsBool, IsBool, 1, 0) INST(IsFloat, IsFloat, 1, 0) INST(IsUnsignedInt, IsUnsignedInt, 1, 0) INST(IsSignedInt, IsSignedInt, 1, 0) +INST(IsVector, IsVector, 1, 0) INST(ForwardDifferentiate, ForwardDifferentiate, 1, 0) @@ -1076,7 +1077,9 @@ INST(SPIRVAsmInst, SPIRVAsmInst, 1, 0) // A literal string or 32-bit integer to be passed as operands INST(SPIRVAsmOperandLiteral, SPIRVAsmOperandLiteral, 1, HOISTABLE) // A reference to a slang IRInst, either a value or a type - INST(SPIRVAsmOperandInst, SPIRVAsmOperandInst, 1, HOISTABLE) + // This isn't hoistable, as we sometimes need to change the used value and + // instructions around the specific asm block + INST(SPIRVAsmOperandInst, SPIRVAsmOperandInst, 1, 0) // A named enumerator, the value is stored as a constant operand // It may have a second operand, which if present is a type with which to // construct a constant id to pass, instead of a literal constant @@ -1091,7 +1094,13 @@ INST(SPIRVAsmInst, SPIRVAsmInst, 1, 0) // A special instruction which marks the place to insert the generated // result operand INST(SPIRVAsmOperandResult, SPIRVAsmOperandResult, 0, HOISTABLE) -INST_RANGE(SPIRVAsmOperand, SPIRVAsmOperandLiteral, SPIRVAsmOperandResult) + // A special instruction which represents a type directed truncation + // operation where extra components are dropped + INST(SPIRVAsmOperandTruncate, __truncate, 0, HOISTABLE) + // A type function which returns the result type of sampling an image of + // this component type + INST(SPIRVAsmOperandSampledType, __sampledType, 1, HOISTABLE) +INST_RANGE(SPIRVAsmOperand, SPIRVAsmOperandLiteral, SPIRVAsmOperandSampledType) #undef PARENT diff --git a/source/slang/slang-ir-insts.h b/source/slang/slang-ir-insts.h index 9d2f44355..bfcca5b02 100644 --- a/source/slang/slang-ir-insts.h +++ b/source/slang/slang-ir-insts.h @@ -2373,6 +2373,8 @@ struct IRTryCall : IRTerminatorInst struct IRSwizzle : IRInst { + IR_LEAF_ISA(swizzle); + IRUse base; IRInst* getBase() { return base.get(); } @@ -2388,6 +2390,8 @@ struct IRSwizzle : IRInst struct IRSwizzleSet : IRInst { + IR_LEAF_ISA(swizzleSet); + IRUse base; IRUse source; @@ -2594,6 +2598,16 @@ struct IRGetTargetTupleElement : IRInst IRInst* getElementIndex() { return getOperand(1); } }; +struct IRMakeVector : IRInst +{ + IR_LEAF_ISA(MakeVector) +}; + +struct IRMakeVectorFromScalar : IRInst +{ + IR_LEAF_ISA(MakeVectorFromScalar) +}; + // An Instruction that creates a differential pair value from a // primal and differential. @@ -2893,6 +2907,8 @@ struct IRDebugLine : IRInst IRInst* getColEnd() { return getOperand(4); } }; +struct IRSPIRVAsm; + struct IRSPIRVAsmOperand : IRInst { IR_PARENT_ISA(SPIRVAsmOperand); @@ -2902,6 +2918,17 @@ struct IRSPIRVAsmOperand : IRInst return nullptr; return getOperand(0); } + IRSPIRVAsm* getAsmBlock() + { + const auto ret = as<IRSPIRVAsm>(getParent()); + SLANG_ASSERT(ret); + return ret; + } +}; + +struct IRSPIRVAsmOperandInst : IRSPIRVAsmOperand +{ + IR_LEAF_ISA(SPIRVAsmOperandInst); }; struct IRSPIRVAsmInst : IRInst @@ -2911,15 +2938,23 @@ struct IRSPIRVAsmInst : IRInst IRSPIRVAsmOperand* getOpcodeOperand() { const auto opcodeOperand = cast<IRSPIRVAsmOperand>(getOperand(0)); - SLANG_ASSERT(opcodeOperand->getOp() == kIROp_SPIRVAsmOperandEnum); + // This must be either: + // - An enum, such as 'OpNop' + // - The __truncate pseudo-instruction + // - A literal, like 107 (OpImageQuerySamples) + SLANG_ASSERT(opcodeOperand->getOp() == kIROp_SPIRVAsmOperandEnum + || opcodeOperand->getOp() == kIROp_SPIRVAsmOperandTruncate + || opcodeOperand->getOp() == kIROp_SPIRVAsmOperandLiteral); return opcodeOperand; } SpvWord getOpcodeOperandWord() { const auto o = getOpcodeOperand(); - SLANG_ASSERT(o->getOp() != kIROp_SPIRVAsmOperandResult); const auto v = o->getValue(); + // It's not valid to call this on an operand which doesn't have a value + // (such as __truncate) + SLANG_ASSERT(v); const auto i = cast<IRIntLit>(v); return SpvWord(i->getValue()); } @@ -3939,6 +3974,8 @@ public: IRSPIRVAsmOperand* emitSPIRVAsmOperandEnum(IRInst* inst, IRType* constantType); IRSPIRVAsmOperand* emitSPIRVAsmOperandBuiltinVar(IRInst* type, IRInst* builtinKind); IRSPIRVAsmOperand* emitSPIRVAsmOperandGLSL450Set(); + IRSPIRVAsmOperand* emitSPIRVAsmOperandSampledType(IRType* elementType); + IRSPIRVAsmOperand* emitSPIRVAsmOperandTruncate(); IRSPIRVAsmInst* emitSPIRVAsmInst(IRInst* opcode, List<IRInst*> operands); IRSPIRVAsm* emitSPIRVAsm(IRType* type); IRInst* emitGenericAsm(UnownedStringSlice asmText); diff --git a/source/slang/slang-ir-legalize-vector-types.cpp b/source/slang/slang-ir-legalize-vector-types.cpp new file mode 100644 index 000000000..9b99f2a98 --- /dev/null +++ b/source/slang/slang-ir-legalize-vector-types.cpp @@ -0,0 +1,193 @@ +#include "slang-ir-legalize-vector-types.h" +#include "slang-ir.h" +#include "slang-ir-insts.h" +#include "slang-ir-util.h" + +namespace Slang +{ + struct VectorTypeLoweringContext + { + IRModule* module; + DiagnosticSink* sink; + + InstWorkList workList; + InstHashSet workListSet; + + Dictionary<IRInst*, IRInst*> replacements; + + VectorTypeLoweringContext(IRModule* module) + :module(module), workList(module), workListSet(module) + {} + + void addToWorkList(IRInst* inst) + { + for (auto ii = inst->getParent(); ii; ii = ii->getParent()) + { + if (as<IRGeneric>(ii)) + return; + } + + if (workListSet.contains(inst)) + return; + + workList.add(inst); + workListSet.add(inst); + } + + bool is1Vector(IRType* t) + { + const auto lenLit = composeGetters<IRIntLit>(t, &IRVectorType::getElementCount); + return lenLit ? getIntVal(lenLit) == 1 : false; + }; + + bool has1VectorType(IRInst* i) + { + return is1Vector(i->getDataType()); + } + + bool has1VectorPtrType(IRInst* i) + { + const auto ptr = as<IRPtrTypeBase>(i->getDataType()); + return ptr && is1Vector(ptr->getValueType()); + } + + // If necessary, this returns a new instruction which operates on the + // single component of a 1-vector. + // If no new instruction was created, then the old one is returned + // unmodified, when we replace the 1-vector type globally, only then + // will the return type of that instruction be updated; thus you + // shouldn't rely on this function returning an instruction with a non + // 1-vector return type (even if we didn't have the deferred + // replacement this is not true, as it'll only eliminate at most one + // level of 1-vectornes, and nested vectors exist) + IRInst* getReplacement(IRInst* inst) + { + IRInst* replacement = nullptr; + if(replacements.tryGetValue(inst, replacement)) + return replacement; + + IRBuilder builder(module); + builder.setInsertBefore(inst); + replacement = instMatch<IRInst*>(inst, nullptr, + // The following match instructions which take a 1-vector as an + // operand and are sensitive to the fact that it's a vector. + // Likewise for pointers. + [&](IRGetElement* getElement){ + const auto base = getElement->getBase(); + return has1VectorType(base) ? getReplacement(base) : nullptr; + }, + [&](IRSwizzle* swizzle) -> IRInst*{ + const auto swizzled = swizzle->getBase(); + + // Is this a swizzle of a 1-vector + if(has1VectorType(swizzled)) + { + // If this is a unary swizzle, just return the element + // inside + const auto scalar = getReplacement(swizzled); + if(swizzle->getElementCount() == 1) + return scalar; + // Otherwise, create a broadcast of this scalar + else + return builder.emitMakeVectorFromScalar( + swizzle->getFullType(), + scalar); + } + return nullptr; + }, + [&](IRGetElementPtr* gep){ + const auto base = gep->getBase(); + return has1VectorPtrType(base) ? getReplacement(base) : nullptr; + }, + [&](IRSwizzledStore* swizzledStore){ + const auto base = swizzledStore->getDest(); + return has1VectorPtrType(base) + ? builder.emitStore(getReplacement(base), swizzledStore->getSource()) + : nullptr; + }, + // The following should match any instruction which can construct, + // specifically, a 1-vector. For example 'MakeVector' + // + // Instruction like, for example, arithmetic instructions don't + // need to be handled here, and they'll be fixed by the global + // 1-vector to scalar type replacement. + [&](IRMakeVectorFromScalar* makeVec){ + return has1VectorType(makeVec) + ? getReplacement(makeVec->getOperand(0)) + : nullptr; + }, + [&](IRMakeVector* makeVec){ + return has1VectorType(makeVec) + ? getReplacement(makeVec->getOperand(0)) + : nullptr; + }, + // Otherwise if this is a 1-vector type itself, replace it with + // the scalar version. + [&](IRVectorType* vecTy){ + return is1Vector(vecTy) + ? getReplacement(vecTy->getElementType()) + : nullptr; + }); + + // Sadly it's not really possible to catch missing cases here, as + // there are heaps of instructions which don't do anything special + // with vectors, but can take or return vector types, for example + // arithmetic, IRGetElement, IRGetField etc... + + // If we did get a replacement, add that to our mapping and return + // it, otherwise return the original (to maybe be updated later) + if(replacement) + { + replacements.set(inst, replacement); + addToWorkList(replacement); + } + + return replacement ? replacement : inst; + } + + void processModule() + { + addToWorkList(module->getModuleInst()); + + while (workList.getCount() != 0) + { + IRInst* inst = workList.getLast(); + + workList.removeLast(); + workListSet.remove(inst); + + // Run this inst through the replacer + getReplacement(inst); + + for (auto child = inst->getLastChild(); child; child = child->getPrevInst()) + { + addToWorkList(child); + } + } + + // Apply all replacements + // + // It's important to defer this as if we were updating things + // on-the-fly we would be losing information about what was + // actually a 1-vector or not. The alternative would be cloning + // every function with a 1-vector type as we process it, and + // cleaning up at the end. This involves less copying, but is + // necessarily a little less type-safe. + for (const auto& [old, replacement] : replacements) + { + if(old != replacement) + { + old->replaceUsesWith(replacement); + old->removeAndDeallocate(); + } + } + } + }; + + void legalizeVectorTypes(IRModule* module, DiagnosticSink* sink) + { + VectorTypeLoweringContext context(module); + context.sink = sink; + context.processModule(); + } +} diff --git a/source/slang/slang-ir-legalize-vector-types.h b/source/slang/slang-ir-legalize-vector-types.h new file mode 100644 index 000000000..4428a10b0 --- /dev/null +++ b/source/slang/slang-ir-legalize-vector-types.h @@ -0,0 +1,13 @@ +#pragma once + +namespace Slang +{ + struct IRModule; + class DiagnosticSink; + + // - [ ] Lower 0 length vectors to unit + // - [x] Lower 1 length vectors to scalar + // - [ ] Lower too long vectors to tuples + void legalizeVectorTypes(IRModule* module, DiagnosticSink* sink); + +} diff --git a/source/slang/slang-ir-peephole.cpp b/source/slang/slang-ir-peephole.cpp index 34ccdf924..f6e8a3458 100644 --- a/source/slang/slang-ir-peephole.cpp +++ b/source/slang/slang-ir-peephole.cpp @@ -922,6 +922,7 @@ struct PeepholeContext : InstPassBase case kIROp_IsUnsignedInt: case kIROp_IsSignedInt: case kIROp_IsBool: + case kIROp_IsVector: { auto type = inst->getOperand(0)->getDataType(); if (auto vectorType = as<IRVectorType>(type)) @@ -950,6 +951,9 @@ struct PeepholeContext : InstPassBase case kIROp_IsSignedInt: result = isIntegralType(type) && getIntTypeInfo(type).isSigned; break; + case kIROp_IsVector: + result = as<IRVectorType>(type); + break; } inst->replaceUsesWith(builder.getBoolValue(result)); maybeRemoveOldInst(inst); diff --git a/source/slang/slang-ir-spirv-legalize.cpp b/source/slang/slang-ir-spirv-legalize.cpp index a4b33324b..63be9d19d 100644 --- a/source/slang/slang-ir-spirv-legalize.cpp +++ b/source/slang/slang-ir-spirv-legalize.cpp @@ -158,36 +158,42 @@ struct SPIRVLegalizationContext : public SourceEmitterBase auto user = use->getUser(); IRBuilder builder(user); builder.setInsertBefore(user); - switch (user->getOp()) + if(as<IRGetElement>(user) || as<IRFieldExtract>(user)) { - case kIROp_GetElement: - case kIROp_FieldExtract: - { - auto basePtrType = as<IRPtrTypeBase>(addr->getDataType()); - IRType* ptrType = nullptr; - if (basePtrType->hasAddressSpace()) - ptrType = builder.getPtrType(kIROp_PtrType, user->getDataType(), basePtrType->getAddressSpace()); - else - ptrType = builder.getPtrType(kIROp_PtrType, user->getDataType()); - IRInst* subAddr = nullptr; - if (user->getOp() == kIROp_GetElement) - subAddr = builder.emitElementAddress(ptrType, addr, as<IRGetElement>(user)->getIndex()); - else - subAddr = builder.emitFieldAddress(ptrType, addr, as<IRFieldExtract>(user)->getField()); - - for (auto u = user->firstUse; u; u = u->nextUse) - { - workList.add(WorkItem{ subAddr, u }); - } - instsToRemove.add(user); - break; - } - default: + auto basePtrType = as<IRPtrTypeBase>(addr->getDataType()); + IRType* ptrType = nullptr; + if (basePtrType->hasAddressSpace()) + ptrType = builder.getPtrType(kIROp_PtrType, user->getDataType(), basePtrType->getAddressSpace()); + else + ptrType = builder.getPtrType(kIROp_PtrType, user->getDataType()); + IRInst* subAddr = nullptr; + if (user->getOp() == kIROp_GetElement) + subAddr = builder.emitElementAddress(ptrType, addr, as<IRGetElement>(user)->getIndex()); + else + subAddr = builder.emitFieldAddress(ptrType, addr, as<IRFieldExtract>(user)->getField()); + + for (auto u = user->firstUse; u; u = u->nextUse) { - auto val = builder.emitLoad(addr); - builder.replaceOperand(use, val); - break; + workList.add(WorkItem{ subAddr, u }); } + instsToRemove.add(user); + } + else if(const auto spirvAsmOperand = as<IRSPIRVAsmOperandInst>(user)) + { + // If this is being used in an asm block, insert the load to + // just prior to the block. + const auto asmBlock = spirvAsmOperand->getAsmBlock(); + builder.setInsertBefore(asmBlock); + auto loadedValue = builder.emitLoad(addrInst); + builder.setInsertBefore(spirvAsmOperand); + auto loadedValueOperand = builder.emitSPIRVAsmOperandInst(loadedValue); + spirvAsmOperand->replaceUsesWith(loadedValueOperand); + spirvAsmOperand->removeAndDeallocate(); + } + else + { + auto val = builder.emitLoad(addr); + builder.replaceOperand(use, val); } } @@ -212,9 +218,6 @@ struct SPIRVLegalizationContext : public SourceEmitterBase innerType = arrayType->getElementType(); } - if (as<IRResourceTypeBase>(innerType)) - return; - SpvStorageClass storageClass = SpvStorageClassPrivate; // Figure out storage class based on var layout. if (auto layout = getVarLayout(inst)) @@ -231,6 +234,15 @@ struct SPIRVLegalizationContext : public SourceEmitterBase } } + // Textures and Samplers can't be in Uniform for Vulkan, if they are + // placed here then put them in UniformConstant instead + if (storageClass == SpvStorageClassUniform + && (as<IRTextureTypeBase>(inst->getDataType()) + || as<IRSamplerStateTypeBase>(inst->getDataType()))) + { + storageClass = SpvStorageClassUniformConstant; + } + // Strip any HLSL wrappers IRBuilder builder(m_sharedContext->m_irModule); bool needLoad = true; @@ -875,6 +887,19 @@ struct SPIRVLegalizationContext : public SourceEmitterBase } } + void processConstructor(IRInst* inst) + { + // If all of the operands to this instruction are global, we can hoist + // this constructor to be a global too. This is important to make sure + // that vectors made of constant components end up being emitted as + // constant vectors (using OpConstantComposite). + UIndex opIndex = 0; + for (auto operand = inst->getOperands(); opIndex < inst->getOperandCount(); operand++, opIndex++) + if(operand->get()->getParent() != m_module->getModuleInst()) + return; + inst->insertAtEnd(m_module->getModuleInst()); + } + void processModule() { // Process global params before anything else, so we don't generate inefficient @@ -936,6 +961,25 @@ struct SPIRVLegalizationContext : public SourceEmitterBase case kIROp_Switch: processSwitch(as<IRSwitch>(inst)); break; + + case kIROp_MakeVectorFromScalar: + case kIROp_MakeUInt64: + case kIROp_MakeVector: + case kIROp_MakeMatrix: + case kIROp_MakeMatrixFromScalar: + case kIROp_MatrixReshape: + case kIROp_MakeArray: + case kIROp_MakeArrayFromElement: + case kIROp_MakeStruct: + case kIROp_MakeTuple: + case kIROp_MakeTargetTuple: + case kIROp_MakeResultValue: + case kIROp_MakeResultError: + case kIROp_MakeOptionalValue: + case kIROp_MakeOptionalNone: + processConstructor(inst); + break; + default: for (auto child = inst->getLastChild(); child; child = child->getPrevInst()) { diff --git a/source/slang/slang-ir.cpp b/source/slang/slang-ir.cpp index d3cfea6e9..6777c0b3a 100644 --- a/source/slang/slang-ir.cpp +++ b/source/slang/slang-ir.cpp @@ -5772,6 +5772,31 @@ namespace Slang return i; } + IRSPIRVAsmOperand* IRBuilder::emitSPIRVAsmOperandSampledType(IRType* elementType) + { + SLANG_ASSERT(as<IRSPIRVAsm>(m_insertLoc.getParent())); + const auto i = createInst<IRSPIRVAsmOperand>( + this, + kIROp_SPIRVAsmOperandSampledType, + getTypeType(), + elementType + ); + addInst(i); + return i; + } + + IRSPIRVAsmOperand* IRBuilder::emitSPIRVAsmOperandTruncate() + { + SLANG_ASSERT(as<IRSPIRVAsm>(m_insertLoc.getParent())); + const auto i = createInst<IRSPIRVAsmOperand>( + this, + kIROp_SPIRVAsmOperandTruncate, + getVoidType() + ); + addInst(i); + return i; + } + IRSPIRVAsmInst* IRBuilder::emitSPIRVAsmInst(IRInst* opcode, List<IRInst*> operands) { SLANG_ASSERT(as<IRSPIRVAsm>(m_insertLoc.getParent())); @@ -6631,6 +6656,14 @@ namespace Slang case kIROp_SPIRVAsmOperandResult: dump(context, "result"); return; + case kIROp_SPIRVAsmOperandTruncate: + dump(context, "__truncate"); + return; + case kIROp_SPIRVAsmOperandSampledType: + dump(context, "__sampledType("); + dumpInstExpr(context, inst->getOperand(0)); + dump(context, ")"); + return; } dump(context, opInfo.name); diff --git a/source/slang/slang-ir.h b/source/slang/slang-ir.h index 413410880..54ef87009 100644 --- a/source/slang/slang-ir.h +++ b/source/slang/slang-ir.h @@ -2483,7 +2483,6 @@ template<typename R, typename T> static T thisArg(R (T::*&&())()); } -#if __cplusplus >= 201703L // A tool to "pattern match" an instruction against multiple cases // Use like: // @@ -2499,7 +2498,6 @@ static T thisArg(R (T::*&&())()); template<typename R, typename F, typename... Fs> R instMatch(IRInst* i, R def, F f, Fs... fs) { - static_assert(__cplusplus >= 201703L, "Wait until we're on c++17 to use instMatch"); // Recursive case using P = decltype(detail::argType(std::function{std::declval<F>()})); if(auto s = as<P>(i)) @@ -2531,7 +2529,6 @@ R instMatch(IRInst*, R def) template<typename F, typename... Fs> void instMatch_(IRInst* i, F f, Fs... fs) { - static_assert(__cplusplus >= 201703L, "Wait until we're on c++17 to use instMatch_"); // Recursive case using P = decltype(detail::argType(std::function{std::declval<F>()})); if(auto s = as<P>(i)) @@ -2546,7 +2543,6 @@ void instMatch_(IRInst*) { // Base case with no eliminators } -#endif // A tool to compose a bunch of downcasts and accessors // `composeGetters<R>(x, &MyStruct::getFoo, &MyOtherStruct::getBar)` translates to diff --git a/source/slang/slang-lower-to-ir.cpp b/source/slang/slang-lower-to-ir.cpp index ed1da3d25..4266b46f9 100644 --- a/source/slang/slang-lower-to-ir.cpp +++ b/source/slang/slang-lower-to-ir.cpp @@ -3862,6 +3862,20 @@ struct ExprLoweringVisitorBase : public ExprVisitor<Derived, LoweredValInfo> } return builder->emitSPIRVAsmOperandInst(i); } + case SPIRVAsmOperand::SampledType: + { + IRType* i; + { + IRBuilderInsertLocScope insertScope(builder); + builder->setInsertBefore(spirvAsmInst); + i = lowerType(context, operand.type.type); + } + return builder->emitSPIRVAsmOperandSampledType(i); + } + case SPIRVAsmOperand::TruncateMarker: + { + return builder->emitSPIRVAsmOperandTruncate(); + } } SLANG_UNREACHABLE("Unhandled case in visitSPIRVAsmExpr"); }; diff --git a/source/slang/slang-parser.cpp b/source/slang/slang-parser.cpp index c3eba8c58..306d2cbec 100644 --- a/source/slang/slang-parser.cpp +++ b/source/slang/slang-parser.cpp @@ -6310,6 +6310,19 @@ namespace Slang { return SPIRVAsmOperand{SPIRVAsmOperand::ResultMarker, parser->ReadToken()}; } + // The handy __sampledType function + if(AdvanceIf(parser, "__sampledType")) + { + parser->ReadToken(TokenType::LParent); + const auto typeExpr = parser->ParseType(); + parser->ReadMatchingToken(TokenType::RParent); + return SPIRVAsmOperand{SPIRVAsmOperand::SampledType, Token{}, typeExpr}; + } + // The pseudo-operand for component truncation + else if(parser->LookAheadToken("__truncate")) + { + return SPIRVAsmOperand{SPIRVAsmOperand::TruncateMarker, parser->ReadToken()}; + } else if (AdvanceIf(parser, "builtin")) { // reference to a builtin var. @@ -6325,7 +6338,6 @@ namespace Slang { return SPIRVAsmOperand{ SPIRVAsmOperand::GLSL450Set, parser->ReadToken() }; } - // A regular identifier else if(parser->LookAheadToken(TokenType::Identifier)) { @@ -6362,7 +6374,8 @@ namespace Slang // A $foo variable else if(AdvanceIf(parser, TokenType::Dollar)) { - return slangIdentOperand(SPIRVAsmOperand::SlangValue); + Expr* expr = parseAtomicExpr(parser); + return SPIRVAsmOperand{SPIRVAsmOperand::SlangValue, Token{}, expr}; } // A $$foo type else if(AdvanceIf(parser, TokenType::DollarDollar)) @@ -6465,7 +6478,7 @@ namespace Slang || resultOperand) { // Insert the LHS result-type operand - if(ret.operands.getCount() == opInfo->resultTypeIndex && resultTypeOperand) + if(opInfo && ret.operands.getCount() == opInfo->resultTypeIndex && resultTypeOperand) { ret.operands.add(*resultTypeOperand); resultTypeOperand.reset(); @@ -6473,14 +6486,14 @@ namespace Slang } // Insert the LHS result operand - if(ret.operands.getCount() == opInfo->resultIdIndex && resultOperand) + if(opInfo && ret.operands.getCount() == opInfo->resultIdIndex && resultOperand) { ret.operands.add(*resultOperand); resultOperand.reset(); continue; } - if(ret.operands.getCount() == opInfo->maxOperandCount) + if(opInfo && ret.operands.getCount() == opInfo->maxOperandCount) { parser->diagnose( parser->tokenReader.peekLoc(), diff --git a/source/slang/slang-stdlib-textures.cpp b/source/slang/slang-stdlib-textures.cpp new file mode 100644 index 000000000..e5af1367f --- /dev/null +++ b/source/slang/slang-stdlib-textures.cpp @@ -0,0 +1,1241 @@ +#include "slang-stdlib-textures.h" + +#define EMIT_LINE_DIRECTIVE() sb << "#line " << (__LINE__+1) << " \"slang-stdlib-textures.cpp\"\n" + +namespace Slang +{ + +// Concatenate anything which can be passed to a StringBuilder +template<typename... Ts> +String cat(const Ts&... xs) +{ + return (StringBuilder{} << ... << xs); +}; + +// +// Utilities +// + +const auto indentWidth = 4; +static const char spaces[] = " "; +static_assert(SLANG_COUNT_OF(spaces) % indentWidth == 1); + +struct BraceScope +{ + BraceScope(const char*& i, StringBuilder& sb, const char* end = "\n") + :i(i), sb(sb), end(end) + { + // If we hit this assert, it means that we are indenting too deep and + // need more spaces in 'spaces' above. + SLANG_ASSERT(i != spaces); + sb << i << "{\n"; + i -= indentWidth; + } + ~BraceScope() + { + // If we hit this assert, it means that we've got a bug unindenting + // more than we've indented. + SLANG_ASSERT(*i != '\0'); + i += indentWidth; + sb << i << "}" << end; + } + const char*& i; + StringBuilder& sb; + const char* end; +}; + +TextureTypeInfo::TextureTypeInfo( + TextureTypePrefixInfo const& prefixInfo, + BaseTextureShapeInfo const& base, + bool isArray, + bool isMultisample, + BaseTextureAccessInfo const& accessInfo, + StringBuilder& inSB, + String const& inPath) + : prefixInfo(prefixInfo) + , base(base) + , isArray(isArray) + , isMultisample(isMultisample) + , accessInfo(accessInfo) + , sb(inSB) + , path(inPath) +{ + i = spaces + SLANG_COUNT_OF(spaces) - 1; +} + +void TextureTypeInfo::writeFuncBody( + const char* funcName, + const String& glsl, + const String& cuda, + const String& spirv) +{ + BraceScope funcScope{i, sb}; + { + sb << i << "__target_switch\n"; + BraceScope switchScope{i, sb}; + sb << i << "case cpp:\n"; + sb << i << "case hlsl:\n"; + sb << i << "__intrinsic_asm \"." << funcName << "\";\n"; + if(glsl.getLength()) + { + sb << i << "case glsl:\n"; + sb << i << "__intrinsic_asm \"" << glsl << "\";\n"; + } + if(cuda.getLength()) + { + sb << i << "case cuda:\n"; + sb << i << "__intrinsic_asm \"" << cuda << "\";\n"; + } + if(spirv.getLength()) + { + sb << i << "case spirv:\n"; + sb << i << "return spirv_asm\n"; + BraceScope spirvScope{i, sb, ";\n"}; + sb << spirv << "\n"; + } + } +} + +void TextureTypeInfo::writeFuncDecorations( + const String& glsl, + const String& cuda) +{ + if(glsl.getLength()) + sb << i << "__target_intrinsic(glsl, \"" << glsl << "\")\n"; + if(cuda.getLength()) + sb << i << "__target_intrinsic(cuda, \"" << cuda << "\")\n"; +} + +void TextureTypeInfo::writeFuncWithSig( + const char* funcName, + const String& sig, + const String& glsl, + const String& spirv, + const String& cuda, + const ReadNoneMode readNoneMode) +{ + const bool isReadOnly = (accessInfo.access == SLANG_RESOURCE_ACCESS_READ); + const bool rn = + readNoneMode == ReadNoneMode::Always + || readNoneMode == ReadNoneMode::IfReadOnly && isReadOnly; + if(spirv.getLength()) + { + if(rn) + sb << i << "[__readNone]\n"; + sb << i << sig << "\n"; + writeFuncBody(funcName, glsl, cuda, spirv); + } + else + { + writeFuncDecorations(glsl, cuda); + if(rn) + sb << i << "[__readNone]\n"; + sb << i << sig << ";\n"; + } + sb << "\n"; +} + +void TextureTypeInfo::writeFunc( + const char* returnType, + const char* funcName, + const String& params, + const String& glsl, + const String& spirv, + const String& cuda, + const ReadNoneMode readNoneMode) +{ + writeFuncWithSig( + funcName, + cat(returnType, " ", funcName, "(", params, ")"), + glsl, + spirv, + cuda, + readNoneMode + ); +} + +void TextureTypeInfo::emitTypeDecl() +{ + char const* baseName = prefixInfo.name; + char const* baseShapeName = base.shapeName; + TextureFlavor::Shape baseShape = base.baseShape; + + // Arrays of 3D textures aren't allowed + if (isArray && baseShape == TextureFlavor::Shape::Shape3D) return; + + auto access = accessInfo.access; + + // No such thing as RWTextureCube + if (access == SLANG_RESOURCE_ACCESS_READ_WRITE && baseShape == TextureFlavor::Shape::ShapeCube) + { + return; + } + + // TODO: any constraints to enforce on what gets to be multisampled? + + unsigned flavor = baseShape; + if (isArray) flavor |= TextureFlavor::ArrayFlag; + if (isMultisample) flavor |= TextureFlavor::MultisampleFlag; + // if (isShadow) flavor |= TextureFlavor::ShadowFlag; + + flavor |= (access << 8); + + // emit a generic signature + sb << "__generic<T = float4"; + // Multi-sample rw texture types have an optional sampleCount parameter. + if (isMultisample) + sb << ", let sampleCount : int = 0"; + sb << ">"; + + if(prefixInfo.combined) + { + sb << "__magic_type(TextureSamplerType," << int(flavor) << ")\n"; + sb << "__intrinsic_type(" << (kIROp_TextureSamplerType + (int(flavor) << kIROpMeta_OtherShift)) << ")\n"; + } + else + { + sb << "__magic_type(TextureType," << int(flavor) << ")\n"; + sb << "__intrinsic_type(" << (kIROp_TextureType + (int(flavor) << kIROpMeta_OtherShift)) << ")\n"; + } + sb << "struct "; + sb << accessInfo.name; + sb << baseName; + sb << baseShapeName; + if (isMultisample) sb << "MS"; + if (isArray) sb << "Array"; + // if (isShadow) sb << "Shadow"; + sb << "\n"; + + // The struct body + { + BraceScope structBodyScope{i, sb, ";\n"}; + + writeQueryFunctions(); + + if(baseShape != TextureFlavor::Shape::ShapeCube) + writeSubscriptFunctions(); + + if( !isMultisample ) + writeSampleFunctions(); + } + + writeGatherExtensions(); +} // TextureTypeInfo::emitTypeDecl + +void TextureTypeInfo::writeQueryFunctions() +{ + static const char* kComponentNames[]{ "x", "y", "z", "w" }; + + TextureFlavor::Shape baseShape = base.baseShape; + + char const* samplerStateParam = prefixInfo.combined ? "" : "SamplerState s, "; + auto access = accessInfo.access; + + if( !isMultisample ) + { + writeFunc( + "float", + "CalculateLevelOfDetail", + cat(samplerStateParam, "float", base.coordCount, " location"), + cat("textureQueryLod($p, $2).x"), + "", + "", + ReadNoneMode::Never + ); + + writeFunc( + "float", + "CalculateLevelOfDetailUnclamped", + cat(samplerStateParam, "float", base.coordCount, " location"), + cat("textureQueryLod($p, $2).y"), + "", + "", + ReadNoneMode::Never + ); + } + + // `GetDimensions` + const char* dimParamTypes[] = {"out float ", "out int ", "out uint "}; + for(auto t : dimParamTypes) + for(int includeMipInfo = 0; includeMipInfo < 2; ++includeMipInfo) + { + StringBuilder glsl; + { + + glsl << "("; + + int aa = 1; + String lodStr = ", 0"; + if (includeMipInfo) + { + int mipLevelArg = aa++; + lodStr = ", int($"; + lodStr.append(mipLevelArg); + lodStr.append(")"); + } + + String opStr = " = textureSize($0" + lodStr; + switch( access ) + { + case SLANG_RESOURCE_ACCESS_READ_WRITE: + case SLANG_RESOURCE_ACCESS_RASTER_ORDERED: + opStr = " = imageSize($0"; + break; + + default: + break; + } + + + int cc = 0; + switch(baseShape) + { + case TextureFlavor::Shape::Shape1D: + glsl << "($" << aa++ << opStr << ")"; + if (isArray) + { + glsl << ".x"; + } + glsl << ")"; + cc = 1; + break; + + case TextureFlavor::Shape::Shape2D: + case TextureFlavor::Shape::ShapeCube: + glsl << "($" << aa++ << opStr << ").x)"; + glsl << ", ($" << aa++ << opStr << ").y)"; + cc = 2; + break; + + case TextureFlavor::Shape::Shape3D: + glsl << "($" << aa++ << opStr << ").x)"; + glsl << ", ($" << aa++ << opStr << ").y)"; + glsl << ", ($" << aa++ << opStr << ").z)"; + cc = 3; + break; + + default: + SLANG_UNEXPECTED("unhandled resource shape"); + break; + } + + if(isArray) + { + glsl << ", ($" << aa++ << opStr << ")." << kComponentNames[cc] << ")"; + } + + if(isMultisample) + { + glsl << ", ($" << aa++ << " = textureSamples($0))"; + } + + if (includeMipInfo) + { + glsl << ", ($" << aa++ << " = textureQueryLevels($0))"; + } + + + glsl << ")"; + } + + StringBuilder params; + if(includeMipInfo) + params << "uint mipLevel, "; + + switch(baseShape) + { + case TextureFlavor::Shape::Shape1D: + params << t << "width"; + break; + + case TextureFlavor::Shape::Shape2D: + case TextureFlavor::Shape::ShapeCube: + params << t << "width,"; + params << t << "height"; + break; + + case TextureFlavor::Shape::Shape3D: + params << t << "width,"; + params << t << "height,"; + params << t << "depth"; + break; + + default: + assert(!"unexpected"); + break; + } + + if(isArray) + { + params << ", " << t << "elements"; + } + + if(isMultisample) + { + params << ", " << t << "sampleCount"; + } + + if(includeMipInfo) + params << ", " << t << "numberOfLevels"; + + sb << " __glsl_version(450)\n"; + sb << " __glsl_extension(GL_EXT_samplerless_texture_functions)\n"; + writeFunc( + "void", + "GetDimensions", + params, + glsl, + "", + "", + ReadNoneMode::Always); + } + + // `GetSamplePosition()` + if( isMultisample ) + { + writeFunc("float2", "GetSamplePosition", "int s", "", "", "", ReadNoneMode::Never); + } + + // `Load()` + + if( base.coordCount + isArray < 4 ) + { + // The `Load()` operation on an ordinary `Texture2D` takes + // an `int3` for the location, where `.xy` holds the texel + // coordinates, and `.z` holds the mip level to use. + // + // The third coordinate for mip level is absent in + // `Texure2DMS.Load()` and `RWTexture2D.Load`. This pattern + // is repreated for all the other texture shapes. + // + bool needsMipLevel = !isMultisample && (access == SLANG_RESOURCE_ACCESS_READ); + + int loadCoordCount = base.coordCount + isArray + (needsMipLevel?1:0); + + char const* glslFuncName = (access == SLANG_RESOURCE_ACCESS_READ) ? "texelFetch" : "imageLoad"; + + // When translating to GLSL, we need to break apart the `location` argument. + // + // TODO: this should realy be handled by having this member actually get lowered! + static const char* kGLSLLoadCoordsSwizzle[] = { "", "", "x", "xy", "xyz", "xyzw" }; + static const char* kGLSLLoadLODSwizzle[] = { "", "", "y", "z", "w", "error" }; + + // TODO: The GLSL translations here only handle the read-only texture + // cases (stuff that lowers to `texture*` in GLSL) and not the stuff + // that lowers to `image*`. + // + // At some point it may make sense to separate the read-only and + // `RW`/`RasterizerOrdered` cases here rather than try to share code. + + // CUDA + StringBuilder cudaBuilder; + if(!isMultisample) + { + if (access == SLANG_RESOURCE_ACCESS_READ_WRITE) + { + const int coordCount = base.coordCount; + const int vecCount = coordCount + int(isArray); + + if( baseShape != TextureFlavor::Shape::ShapeCube ) + { + cudaBuilder << "surf" << coordCount << "D"; + if (isArray) + { + cudaBuilder << "Layered"; + } + cudaBuilder << "read"; + cudaBuilder << "<$T0>($0"; + for (int j = 0; j < coordCount; ++j) + { + cudaBuilder << ", ($1)"; + if (vecCount > 1) + { + cudaBuilder << '.' << char(j + 'x'); + } + + // Surface access is *byte* addressed in x in CUDA + if (j == 0) + { + cudaBuilder << " * $E"; + } + } + if (isArray) + { + cudaBuilder << ", int(($1)." << char(coordCount + 'x') << ")"; + } + cudaBuilder << ", SLANG_CUDA_BOUNDARY_MODE)"; + } + else + { + cudaBuilder << "__target_intrinsic(cuda, \"surfCubemap"; + if (isArray) + { + cudaBuilder << "Layered"; + } + cudaBuilder << "read"; + + // Surface access is *byte* addressed in x in CUDA + cudaBuilder << "<$T0>($0, ($1).x * $E, ($1).y, ($1).z"; + if (isArray) + { + cudaBuilder << ", int(($1).w)"; + } + cudaBuilder << ", SLANG_CUDA_BOUNDARY_MODE)"; + } + } + else if (access == SLANG_RESOURCE_ACCESS_READ) + { + // We can allow this on Texture1D + if( baseShape == TextureFlavor::Shape::Shape1D && isArray == false) + { + cudaBuilder << "tex1Dfetch<$T0>($0, ($1).x)"; + } + } + } + + sb << i << "__glsl_extension(GL_EXT_samplerless_texture_functions)"; + writeFunc( + "T", + "Load", + cat("int", loadCoordCount, " location", isMultisample ? ", int sampleIndex" : ""), + isMultisample ? cat("$c", glslFuncName, "($0, $1, $2)$z") + : needsMipLevel ? cat( + "$c", + glslFuncName, + "($0, ($1).", + kGLSLLoadCoordsSwizzle[loadCoordCount], + ", ($1).", + kGLSLLoadLODSwizzle[loadCoordCount], + ")$z") + : cat("$c", glslFuncName, "($0, $1)$z"), + "", + cudaBuilder + ); + + glslFuncName = (access == SLANG_RESOURCE_ACCESS_READ) ? "texelFetchOffset" : "imageLoad"; + sb << i << "__glsl_extension(GL_EXT_samplerless_texture_functions)"; + writeFunc( + "T", + "Load", + cat( + "int", loadCoordCount, " location", + isMultisample ? ", int sampleIndex" : "", + ", constexpr int", base.coordCount, " offset" + ), + isMultisample ? cat("$c", glslFuncName, "($0, $0, $1, $2)$z") + : needsMipLevel ? cat( + "$c", glslFuncName, "($0, ($1).", kGLSLLoadCoordsSwizzle[loadCoordCount], + ", ($1).", kGLSLLoadLODSwizzle[loadCoordCount], + ", $2)$z") + : cat("$c", glslFuncName, "($0, $1, 0, $2)$z") + ); + + writeFunc( + "T", + "Load", + cat( + "int", loadCoordCount, " location", + isMultisample ? ", int sampleIndex" : "", + ", constexpr int", base.coordCount, " offset", + ", out uint status" + ) + ); + } +} + +static String spirvReadIntrinsic() +{ + StringBuilder spirvBuilder; + const char* i = " "; + spirvBuilder << i << "%sampled : __sampledType(T) = OpImageRead $this $location;\n"; + spirvBuilder << i << "__truncate $$T result __sampledType(T) %sampled;"; + return spirvBuilder; +} + +static String spirvWriteIntrinsic() +{ + StringBuilder spirvBuilder; + const char* i = " "; + spirvBuilder << i << "OpImageWrite $this $location $newValue;"; + return spirvBuilder; +} + +void TextureTypeInfo::writeSubscriptFunctions() +{ + TextureFlavor::Shape baseShape = base.baseShape; + auto access = accessInfo.access; + + int N = base.coordCount + isArray; + + char const* uintNs[] = { "", "uint", "uint2", "uint3", "uint4" }; + char const* ivecNs[] = { "", "int", "ivec2", "ivec3", "ivec4" }; + + auto uintN = uintNs[N]; + auto ivecN = ivecNs[N]; + + // subscript operator + sb << i << "__subscript(" << uintN << " location) -> T\n"; + BraceScope subscriptScope{i, sb}; + + // !!!!!!!!!!!!!!!!!!!! get !!!!!!!!!!!!!!!!!!!!!!! + + // GLSL/SPIR-V distinguishes sampled vs. non-sampled images + StringBuilder glslBuilder; + { + switch( access ) + { + case SLANG_RESOURCE_ACCESS_NONE: + case SLANG_RESOURCE_ACCESS_READ: + sb << i << "__glsl_extension(GL_EXT_samplerless_texture_functions)\n"; + glslBuilder << "$ctexelFetch($0, " << ivecN << "($1)"; + if( !isMultisample ) + { + glslBuilder << ", 0"; + } + else + { + // TODO: how to handle passing through sample index? + glslBuilder << ", 0"; + } + break; + + default: + glslBuilder << "$cimageLoad($0, " << ivecN << "($1)"; + if( isMultisample ) + { + // TODO: how to handle passing through sample index? + glslBuilder << ", 0"; + } + break; + } + glslBuilder << ")$z"; + } + + // CUDA + StringBuilder cudaBuilder; + { + if (access == SLANG_RESOURCE_ACCESS_READ_WRITE) + { + const int coordCount = base.coordCount; + const int vecCount = coordCount + int(isArray); + + cudaBuilder << "surf"; + if( baseShape != TextureFlavor::Shape::ShapeCube ) + { + cudaBuilder << coordCount << "D"; + } + else + { + cudaBuilder << "Cubemap"; + } + + cudaBuilder << (isArray ? "Layered" : ""); + cudaBuilder << "read$C<$T0>($0"; + + for (int j = 0; j < vecCount; ++j) + { + cudaBuilder << ", ($1)"; + if (vecCount > 1) + { + cudaBuilder << '.' << char(j + 'x'); + } + // Surface access is *byte* addressed in x in CUDA + if (j == 0) + { + cudaBuilder << " * $E"; + } + } + + cudaBuilder << ", SLANG_CUDA_BOUNDARY_MODE)"; + } + else if (access == SLANG_RESOURCE_ACCESS_READ) + { + // We can allow this on Texture1D + if( baseShape == TextureFlavor::Shape::Shape1D && isArray == false) + { + cudaBuilder << "tex1Dfetch<$T0>($0, $1)"; + } + } + } + + // Output that has get + writeFuncWithSig( + "operator[]", + "get", + glslBuilder, + spirvReadIntrinsic(), + cudaBuilder + ); + + // !!!!!!!!!!!!!!!!!!!! set !!!!!!!!!!!!!!!!!!!!!!! + + if (!(access == SLANG_RESOURCE_ACCESS_NONE || access == SLANG_RESOURCE_ACCESS_READ)) + { + // CUDA + cudaBuilder.clear(); + { + const int coordCount = base.coordCount; + const int vecCount = coordCount + int(isArray); + + cudaBuilder << "surf"; + if( baseShape != TextureFlavor::Shape::ShapeCube ) + { + cudaBuilder << coordCount << "D"; + } + else + { + cudaBuilder << "Cubemap"; + } + + cudaBuilder << (isArray ? "Layered" : ""); + cudaBuilder << "write$C<$T0>($2, $0"; + for (int j = 0; j < vecCount; ++j) + { + cudaBuilder << ", ($1)"; + if (vecCount > 1) + { + cudaBuilder << '.' << char(j + 'x'); + } + + // Surface access is *byte* addressed in x in CUDA + if (j == 0) + { + cudaBuilder << " * $E"; + } + } + + cudaBuilder << ", SLANG_CUDA_BOUNDARY_MODE)"; + } + + // Set + sb << i << "[nonmutating]\n"; + writeFuncWithSig( + "operator[]", + "set(T newValue)", + cat("imageStore($0, ", ivecN, "($1), $V2)"), + spirvWriteIntrinsic(), + cudaBuilder + ); + } + + // !!!!!!!!!!!!!!!!!! ref !!!!!!!!!!!!!!!!!!!!!!!!! + + // Depending on the access level of the texture type, + // we either have just a getter (the default), or both + // a getter and setter. + switch( access ) + { + case SLANG_RESOURCE_ACCESS_NONE: + case SLANG_RESOURCE_ACCESS_READ: + break; + default: + sb << i << "__intrinsic_op(" << int(kIROp_ImageSubscript) << ") ref;\n"; + break; + } +} + +static String cudaSampleIntrinsic(const bool isArray, const BaseTextureShapeInfo& base, bool sampleLevel) +{ + StringBuilder cudaBuilder; + + TextureFlavor::Shape baseShape = base.baseShape; + const int coordCount = base.coordCount; + + if( baseShape != TextureFlavor::Shape::ShapeCube ) + { + cudaBuilder << "tex" << coordCount << "D"; + if (isArray) + cudaBuilder << "Layered"; + if(sampleLevel) + cudaBuilder << "Lod"; + cudaBuilder << "<$T0>($0"; + for (int i = 0; i < coordCount; ++i) + { + cudaBuilder << ", ($2)"; + cudaBuilder << '.' << "xyzw"[i]; + } + if (isArray) + cudaBuilder << ", int(($2)." << char(coordCount + 'x') << ")"; + if(sampleLevel) + cudaBuilder << ", $3"; + cudaBuilder << ")"; + } + else + { + cudaBuilder << "texCubemap"; + if (isArray) + cudaBuilder << "Layered"; + if(sampleLevel) + cudaBuilder << "Lod"; + cudaBuilder << "<$T0>($0, ($2).x, ($2).y, ($2).z"; + if (isArray) + cudaBuilder << ", int(($2).w)"; + if(sampleLevel) + cudaBuilder << ", $3"; + cudaBuilder << ")"; + } + + return cudaBuilder; +} + +const char* noBias = nullptr; +const char* noLodLevel = nullptr; +const char* noGradX = nullptr; +const char* noGradY = nullptr; +const char* noConstOffset = nullptr; +const char* noMinLod = nullptr; + +static String spirvSampleIntrinsic( + const TextureTypePrefixInfo& prefixInfo, + const char* bias = nullptr, + const char* lodLevel = nullptr, + const char* gradX = nullptr, + const char* gradY = nullptr, + const char* constOffset = nullptr, + const char* minLod = nullptr) +{ + StringBuilder spirvBuilder; + const char* i = " "; + + SLANG_ASSERT(!(!gradX ^ !gradY)); + + if(minLod) + spirvBuilder << i << "OpCapability MinLod;\n"; + + const char* sampledImage; + if(prefixInfo.combined) + { + sampledImage = "$this"; + } + else + { + const char* sampledImageType = "%sampledImageType"; + sampledImage = "%sampledImage"; + spirvBuilder << i << sampledImageType << " = OpTypeSampledImage $$This;\n"; + spirvBuilder << i << sampledImage << " : " << sampledImageType << " = OpSampledImage $this $s;\n"; + } + + const char* op = lodLevel || gradX ? "OpImageSampleExplicitLod" : "OpImageSampleImplicitLod"; + spirvBuilder << i << "%sampled : __sampledType(T) = " << op << " " << sampledImage << " $location"; + spirvBuilder << " None"; + if(bias) + spirvBuilder << "|Bias"; + if(lodLevel) + spirvBuilder << "|Lod"; + if(gradX) + spirvBuilder << "|Grad"; + if(constOffset) + spirvBuilder << "|ConstOffset"; + if(minLod) + spirvBuilder << "|MinLod"; + + if(bias) + spirvBuilder << " $" << bias; + if(lodLevel) + spirvBuilder << " $" << lodLevel; + if(gradX) + spirvBuilder << " $" << gradX << " $" << gradY; + if(constOffset) + spirvBuilder << " $" << constOffset; + if(minLod) + spirvBuilder << " $" << minLod; + spirvBuilder << ";\n"; + spirvBuilder << i << "__truncate $$T result __sampledType(T) %sampled;\n"; + return spirvBuilder; +} + +void TextureTypeInfo::writeSampleFunctions() +{ + TextureFlavor::Shape baseShape = base.baseShape; + char const* samplerStateParam = prefixInfo.combined ? "" : "SamplerState s, "; + + // `Sample()` + + writeFunc( + "T", + "Sample", + cat(samplerStateParam, "float", base.coordCount + isArray, " location"), + "$ctexture($p, $2)$z", + spirvSampleIntrinsic(prefixInfo), + cudaSampleIntrinsic(isArray, base, false) + ); + + if( baseShape != TextureFlavor::Shape::ShapeCube ) + { + writeFunc( + "T", + "Sample", + cat(samplerStateParam, "float", base.coordCount + isArray, " location, ", "constexpr int", base.coordCount, " offset"), + "$ctextureOffset($p, $2, $3)$z", + spirvSampleIntrinsic(prefixInfo, noBias, noLodLevel, noGradX, noGradY, "offset") + ); + } + + writeFunc( + "T", + "Sample", + cat( + samplerStateParam, + "float", base.coordCount + isArray, " location, ", + baseShape == TextureFlavor::Shape::ShapeCube ? "" : cat("constexpr int", base.coordCount, " offset, "), + "float clamp" + ), + "", + spirvSampleIntrinsic( + prefixInfo, + noBias, + noLodLevel, + noGradX, + noGradY, + baseShape == TextureFlavor::Shape::ShapeCube ? nullptr : "offset", + "clamp" + ) + ); + + // SPIR-V todo, use OpImageSparseSampleImplicitLod + writeFunc( + "T", + "Sample", + cat( + samplerStateParam, + "float", base.coordCount + isArray, " location, ", + baseShape != TextureFlavor::Shape::ShapeCube ? cat("constexpr int", base.coordCount, " offset, ") : "", + "float clamp, out uint status" + ) + ); + + writeFunc( + "T", + "SampleBias", + cat( + samplerStateParam, + "float", base.coordCount + isArray, " location, ", + "float bias" + ), + "$ctexture($p, $2, $3)$z", + spirvSampleIntrinsic(prefixInfo, "bias") + ); + + if( baseShape != TextureFlavor::Shape::ShapeCube ) + { + writeFunc( + "T", + "SampleBias", + cat( + samplerStateParam, + "float", base.coordCount + isArray, " location, ", + "float bias, ", + "constexpr int", base.coordCount, " offset" + ), + "$ctextureOffset($p, $2, $3, $4)$z", + spirvSampleIntrinsic(prefixInfo, "bias", noLodLevel, noGradX, noGradY, "offset") + ); + } + int baseCoordCount = base.coordCount; + int arrCoordCount = baseCoordCount + isArray; + if (arrCoordCount <= 3) + { + // `SampleCmp()` and `SampleCmpLevelZero` + + writeFunc( + "float", + "SampleCmp", + cat( + "SamplerComparisonState s, ", + "float", base.coordCount + isArray, " location, ", + "float compareValue" + ), + cat("texture($p, vec", arrCoordCount + 1, "($2, $3))") + ); + + writeFunc( + "float", + "SampleCmpLevelZero", + cat( + "SamplerComparisonState s, ", + "float", base.coordCount + isArray, " location, ", + "float compareValue" + ), + cat("textureLod($p, vec", arrCoordCount + 1, "($2, $3), 0)") + ); + } + + if( baseShape != TextureFlavor::Shape::ShapeCube ) + { + // Note(tfoley): MSDN seems confused, and claims that the `offset` + // parameter for `SampleCmp` is available for everything but 3D + // textures, while `Sample` and `SampleBias` are consistent in + // saying they only exclude `offset` for cube maps (which makes + // sense). I'm going to assume the documentation for `SampleCmp` + // is just wrong. + writeFunc( + "float", + "SampleCmp", + cat( + "SamplerComparisonState s, ", + "float", base.coordCount + isArray, " location, ", + "float compareValue, " + "constexpr int", base.coordCount, " offset" + ), + cat("textureOffset($p, vec", arrCoordCount + 1, "($2, $3), $4)") + ); + + writeFunc( + "float", + "SampleCmpLevelZero", + cat( + "SamplerComparisonState s, ", + "float", base.coordCount + isArray, " location, ", + "float compareValue, " + "constexpr int", base.coordCount, " offset" + ), + cat("textureLodOffset($p, vec", arrCoordCount + 1, "($2, $3), 0, $4)") + ); + } + + // TODO(JS): Not clear how to map this to CUDA, because in HLSL, the gradient is a vector based on + // the dimension. On CUDA there is texNDGrad, but it always just takes ddx, ddy. + // I could just assume 0 for elements not supplied, and ignore z. For now will just leave + writeFunc( + "T", + "SampleGrad", + cat( + samplerStateParam, + "float", base.coordCount + isArray, " location, ", + "float", base.coordCount, " gradX, ", + "float", base.coordCount, " gradY, " + ), + "$ctextureGrad($p, $2, $3, $4)$z", + spirvSampleIntrinsic(prefixInfo, noBias, noLodLevel, "gradX", "gradY") + ); + + if( baseShape != TextureFlavor::Shape::ShapeCube ) + { + writeFunc( + "T", + "SampleGrad", + cat( + samplerStateParam, + "float", base.coordCount + isArray, " location, ", + "float", base.coordCount, " gradX, ", + "float", base.coordCount, " gradY, ", + "constexpr int", base.coordCount, " offset " + ), + "$ctextureGradOffset($p, $2, $3, $4, $5)$z", + spirvSampleIntrinsic(prefixInfo, noBias, noLodLevel, "gradX", "gradY", "offset") + ); + + sb << i << "__glsl_extension(GL_ARB_sparse_texture_clamp)\n"; + writeFunc( + "T", + "SampleGrad", + cat( + samplerStateParam, + "float", base.coordCount + isArray, " location, ", + "float", base.coordCount, " gradX, ", + "float", base.coordCount, " gradY, ", + "constexpr int", base.coordCount, " offset, ", + "float lodClamp" + ), + "$ctextureGradOffsetClampARB($p, $2, $3, $4, $5, $6)$z", + spirvSampleIntrinsic(prefixInfo, noBias, noLodLevel, "gradX", "gradY", "offset", "lodClamp") + ); + } + + // `SampleLevel` + + writeFunc( + "T", + "SampleLevel", + cat( + samplerStateParam, + "float", base.coordCount + isArray, " location, ", + "float level" + ), + "$ctextureLod($p, $2, $3)$z", + spirvSampleIntrinsic(prefixInfo, noBias, "level"), + cudaSampleIntrinsic(isArray, base, true) + ); + + if( baseShape != TextureFlavor::Shape::ShapeCube ) + { + writeFunc( + "T", + "SampleLevel", + cat( + samplerStateParam, + "float", base.coordCount + isArray, " location, ", + "float level, ", + "constexpr int", base.coordCount, " offset" + ), + "$ctextureLodOffset($p, $2, $3, $4)$z", + spirvSampleIntrinsic(prefixInfo, noBias, "level", noGradX, noGradY, "offset") + ); + } +} + +void TextureTypeInfo::writeGatherExtensions() +{ + char const* baseName = prefixInfo.name; + char const* baseShapeName = base.shapeName; + + auto access = accessInfo.access; + + bool isReadOnly = (access == SLANG_RESOURCE_ACCESS_READ); + + char const* samplerStateParam = prefixInfo.combined ? "" : "SamplerState s, "; + + // `Gather*()` operations are handled via an `extension` declaration, + // because this lets us capture the element type of the texture. + // + // TODO: longer-term there should be something like a `TextureElementType` + // interface, that both scalars and vectors implement, that then exposes + // a `Scalar` associated type, and `Gather` can return `vector<T.Scalar, 4>`. + // + static const struct { + char const* genericPrefix; + char const* elementType; + char const* outputType; + } kGatherExtensionCases[] = { + { "__generic<T, let N : int>", "vector<T,N>", "vector<T, 4>" }, + { "", "float", "vector<float, 4>" }, + { "", "int" , "vector<int, 4>"}, + { "", "uint", "vector<uint, 4>"}, + + // TODO: need a case here for scalars `T`, but also + // need to ensure that case doesn't accidentally match + // for `T = vector<...>`, which requires actual checking + // of constraints on generic parameters. + }; + for(auto cc : kGatherExtensionCases) + { + // TODO: this should really be an `if` around the entire `Gather` logic + if (isMultisample) break; + + EMIT_LINE_DIRECTIVE(); + sb << cc.genericPrefix << " __extension "; + sb << accessInfo.name; + sb << baseName; + sb << baseShapeName; + if (isArray) sb << "Array"; + sb << "<" << cc.elementType << " >"; + sb << "\n{\n"; + + // `Gather` + // (tricky because it returns a 4-vector of the element type + // of the texture components...) + // + // TODO: is it actually correct to restrict these so that, e.g., + // `GatherAlpha()` isn't allowed on `Texture2D<float3>` because + // it nominally doesn't have an alpha component? + static const struct { + int componentIndex; + char const* componentName; + } kGatherComponets[] = { + { 0, "" }, + { 0, "Red" }, + { 1, "Green" }, + { 2, "Blue" }, + { 3, "Alpha" }, + }; + enum Cmp + { NotCmp, + Cmp + }; + + for(auto cmp : {NotCmp, Cmp}) + for(auto kk : kGatherComponets) + { + auto samplerOrComparisonSampler = cmp == Cmp ? "SamplerComparisonState s, " : samplerStateParam; + + auto componentIndex = kk.componentIndex; + auto componentName = kk.componentName; + + auto outputType = cc.outputType; + + const auto cmpName = cmp == Cmp ? "Cmp" : ""; + const auto cmpValueParam = cmp == Cmp ? "float compareValue, " : ""; + const auto cmpValueParamEnd = cmp == Cmp ? ", float compareValue" : ""; + const auto supportsGLSL = componentIndex == 0 || cmp == NotCmp; + + EMIT_LINE_DIRECTIVE(); + + if(supportsGLSL) + { + if(cmp == Cmp) + sb << "__target_intrinsic(glsl, \"textureGather($p, $2, $3)\")\n"; + else + sb << "__target_intrinsic(glsl, \"textureGather($p, $2, " << componentIndex << ")\")\n"; + } + if (base.coordCount == 2 && cmp == NotCmp) + { + // Gather only works on 2D in CUDA without comparison + // "It is based on the base type of DataType except when readMode is equal to cudaReadModeNormalizedFloat (see Texture Reference API), in which case it is always float4." + sb << "__target_intrinsic(cuda, \"tex2Dgather<$T0>($0, ($2).x, ($2).y, " << componentIndex << ")\")\n"; + } + if (isReadOnly) + sb << "[__readNone]\n"; + sb << outputType << " Gather" << cmpName << componentName << "(" << samplerOrComparisonSampler; + sb << "float" << base.coordCount + isArray << " location" << cmpValueParamEnd << ");\n"; + + if (isReadOnly) + sb << "[__readNone]\n"; + EMIT_LINE_DIRECTIVE(); + if(supportsGLSL) + { + if(cmp == Cmp) + sb << "__target_intrinsic(glsl, \"textureGatherOffset($p, $2, $3, $4)\")\n"; + else + sb << "__target_intrinsic(glsl, \"textureGatherOffset($p, $2, $3, " << componentIndex << ")\")\n"; + } + sb << outputType << " Gather" << cmpName << componentName << "(" << samplerOrComparisonSampler; + sb << "float" << base.coordCount + isArray << " location, "; + sb << cmpValueParam; + sb << "constexpr int" << base.coordCount << " offset);\n"; + + if (isReadOnly) + sb << "[__readNone]\n"; + EMIT_LINE_DIRECTIVE(); + sb << outputType << " Gather" << cmpName << componentName << "(" << samplerOrComparisonSampler; + sb << "float" << base.coordCount + isArray << " location, "; + sb << cmpValueParam; + sb << "constexpr int" << base.coordCount << " offset, "; + sb << "out uint status);\n"; + + if (isReadOnly) + sb << "[__readNone]\n"; + EMIT_LINE_DIRECTIVE(); + if(supportsGLSL) + { + if(cmp == Cmp) + sb << "__target_intrinsic(glsl, \"textureGatherOffsets($p, $2, $3, ivec" << base.coordCount << "[]($4, $5, $6, $7))\")\n"; + else + sb << "__target_intrinsic(glsl, \"textureGatherOffsets($p, $2, ivec" << base.coordCount << "[]($3, $4, $5, $6), " << componentIndex << ")\")\n"; + } + sb << outputType << " Gather" << cmpName << componentName << "(" << samplerOrComparisonSampler; + sb << "float" << base.coordCount + isArray << " location, "; + sb << cmpValueParam; + sb << "int" << base.coordCount << " offset1, "; + sb << "int" << base.coordCount << " offset2, "; + sb << "int" << base.coordCount << " offset3, "; + sb << "int" << base.coordCount << " offset4);\n"; + + if (isReadOnly) + sb << "[__readNone]\n"; + EMIT_LINE_DIRECTIVE(); + sb << outputType << " Gather" << cmpName << componentName << "(" << samplerOrComparisonSampler; + sb << "float" << base.coordCount + isArray << " location, "; + sb << cmpValueParam; + sb << "int" << base.coordCount << " offset1, "; + sb << "int" << base.coordCount << " offset2, "; + sb << "int" << base.coordCount << " offset3, "; + sb << "int" << base.coordCount << " offset4, "; + sb << "out uint status);\n"; + } + + EMIT_LINE_DIRECTIVE(); + sb << "\n}\n"; + } +} + +} diff --git a/source/slang/slang-stdlib-textures.h b/source/slang/slang-stdlib-textures.h new file mode 100644 index 000000000..19008a3f0 --- /dev/null +++ b/source/slang/slang-stdlib-textures.h @@ -0,0 +1,112 @@ +#pragma once + +#include "slang-ir.h" +#include "slang-type-system-shared.h" +#include "../core/slang-string.h" + +namespace Slang +{ + +static const struct BaseTextureShapeInfo { + char const* shapeName; + TextureFlavor::Shape baseShape; + int coordCount; +} kBaseTextureShapes[] = { + { "1D", TextureFlavor::Shape::Shape1D, 1 }, + { "2D", TextureFlavor::Shape::Shape2D, 2 }, + { "3D", TextureFlavor::Shape::Shape3D, 3 }, + { "Cube", TextureFlavor::Shape::ShapeCube,3 }, +}; + +static const struct BaseTextureAccessInfo { + char const* name; + SlangResourceAccess access; +} kBaseTextureAccessLevels[] = { + { "", SLANG_RESOURCE_ACCESS_READ }, + { "RW", SLANG_RESOURCE_ACCESS_READ_WRITE }, + { "RasterizerOrdered", SLANG_RESOURCE_ACCESS_RASTER_ORDERED }, +}; + +static const struct TextureTypePrefixInfo +{ + char const* name; + bool combined; +} kTexturePrefixes[] = +{ + { "Texture", false }, + { "Sampler", true }, +}; + +struct TextureTypeInfo +{ + TextureTypeInfo( + TextureTypePrefixInfo const& prefixInfo, + BaseTextureShapeInfo const& base, + bool isArray, + bool isMultisample, + BaseTextureAccessInfo const& accessInfo, + StringBuilder& inSB, + String const& inPath); + + TextureTypePrefixInfo const& prefixInfo; + BaseTextureShapeInfo const& base; + bool isArray; + bool isMultisample; + BaseTextureAccessInfo const& accessInfo; + StringBuilder& sb; + String path; + + void emitTypeDecl(); + +private: + // + // Functions for writing specific parts of a definition + // + void writeQueryFunctions(); + void writeSubscriptFunctions(); + void writeSampleFunctions(); + void writeGatherExtensions(); + + // + // More general utilities + // + enum class ReadNoneMode + { + Never, + IfReadOnly, + Always + }; + + void writeFuncBody( + const char* funcName, + const String& glsl, + const String& cuda, + const String& spirv + ); + void writeFuncDecorations( + const String& glsl, + const String& cuda + ); + void writeFuncWithSig( + const char* funcName, + const String& sig, + const String& glsl = String{}, + const String& spirv = String{}, + const String& cuda = String{}, + const ReadNoneMode readNoneMode = ReadNoneMode::IfReadOnly + ); + void writeFunc( + const char* returnType, + const char* funcName, + const String& params, + const String& glsl = String{}, + const String& spirv = String{}, + const String& cuda = String{}, + const ReadNoneMode readNoneMode = ReadNoneMode::IfReadOnly + ); + + // A pointer to a string representing the current level of indentation + const char* i; +}; + +} diff --git a/source/slang/slang-stdlib.cpp b/source/slang/slang-stdlib.cpp index 65d5cf758..5fec05c80 100644 --- a/source/slang/slang-stdlib.cpp +++ b/source/slang/slang-stdlib.cpp @@ -4,6 +4,7 @@ #include "slang-ir.h" #include "slang-syntax.h" #include "slang-ir-util.h" +#include "slang-stdlib-textures.h" #include "../core/slang-string-util.h" #define STRINGIZE(x) STRINGIZE2(x) diff --git a/tests/bindings/glsl-parameter-blocks.slang.glsl b/tests/bindings/glsl-parameter-blocks.slang.glsl index fbeddb905..c00ed8fe3 100644 --- a/tests/bindings/glsl-parameter-blocks.slang.glsl +++ b/tests/bindings/glsl-parameter-blocks.slang.glsl @@ -1,50 +1,31 @@ #version 450 layout(row_major) uniform; layout(row_major) buffer; - -#line 3 "tests/bindings/glsl-parameter-blocks.slang" struct Test_0 { vec4 a_0; }; - -#line 7 layout(binding = 0) layout(std140) uniform _S1 { vec4 a_0; }gTest_0; - -#line 3 layout(binding = 1) uniform texture2D gTest_t_0; - -#line 1237 "core.meta.slang" layout(binding = 2) uniform sampler gTest_s_0; - -#line 89 "core" layout(location = 0) out vec4 _S2; - -#line 902 "core.meta.slang" layout(location = 0) in vec2 _S3; - -#line 12 "tests/bindings/glsl-parameter-blocks.slang" void main() { - vec4 _S4 = (texture(sampler2D(gTest_t_0,gTest_s_0), (_S3))); - -#line 14 - _S2 = gTest_0.a_0 + _S4; - -#line 14 + _S2 = gTest_0.a_0 + (texture(sampler2D(gTest_t_0,gTest_s_0), (_S3))); return; } diff --git a/tests/bugs/gh-941.slang.glsl b/tests/bugs/gh-941.slang.glsl index 4330ece53..111cdb33c 100644 --- a/tests/bugs/gh-941.slang.glsl +++ b/tests/bugs/gh-941.slang.glsl @@ -1,9 +1,7 @@ -//TEST_IGNORE_FILE: - #version 450 - #extension GL_EXT_nonuniform_qualifier : require - +layout(row_major) uniform; +layout(row_major) buffer; struct SLANG_ParameterGroup_C_0 { vec2 uv_0; @@ -11,15 +9,13 @@ struct SLANG_ParameterGroup_C_0 }; layout(binding = 2) -layout(std140) -uniform _S1 +layout(std140) uniform _S1 { vec2 uv_0; uint index_0; -} C_0; - +}C_0; layout(binding = 0) -uniform texture2D t_0[]; +uniform texture2D t_0[]; layout(binding = 1) uniform sampler s_0; @@ -29,11 +25,7 @@ out vec4 _S2; void main() { - vec4 _S3 = texture( - sampler2D( - t_0[C_0.index_0], - s_0), - C_0.uv_0); - _S2 = _S3; + _S2 = (texture(sampler2D(t_0[C_0.index_0],s_0), (C_0.uv_0))); return; -}
\ No newline at end of file +} + diff --git a/tests/compute/texture-simpler.slang b/tests/compute/texture-simpler.slang index ce4b76f77..18af1cd62 100644 --- a/tests/compute/texture-simpler.slang +++ b/tests/compute/texture-simpler.slang @@ -1,4 +1,9 @@ -//TEST(compute, vulkan):COMPARE_COMPUTE:-vk -compute -shaderobj -output-using-type +//TEST(smoke,compute):COMPARE_COMPUTE_EX:-cpu -compute -shaderobj -output-using-type +//TEST(smoke,compute):COMPARE_COMPUTE_EX:-slang -compute -shaderobj -output-using-type +//TEST(smoke,compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -shaderobj -output-using-type +//TEST(smoke,compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -profile cs_6_0 -use-dxil -shaderobj -output-using-type +//TEST(smoke,compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute -shaderobj -output-using-type -render-feature hardware-device +//TEST(smoke,compute):COMPARE_COMPUTE_EX:-cuda -compute -shaderobj -output-using-type //TEST_INPUT: Texture2D(size=4, content = one):name t2D Texture2D<float> t2D; diff --git a/tests/cross-compile/non-uniform-indexing.slang.glsl b/tests/cross-compile/non-uniform-indexing.slang.glsl index 96abe6bac..07e8f9e38 100644 --- a/tests/cross-compile/non-uniform-indexing.slang.glsl +++ b/tests/cross-compile/non-uniform-indexing.slang.glsl @@ -1,10 +1,9 @@ -//TEST_IGNORE_FILE #version 450 - #extension GL_EXT_nonuniform_qualifier : require - +layout(row_major) uniform; +layout(row_major) buffer; layout(binding = 0) -uniform texture2D t_0[10]; +uniform texture2D t_0[10]; layout(binding = 1) uniform sampler s_0; @@ -17,12 +16,7 @@ in vec3 _S2; void main() { - vec4 _S3 = texture( - sampler2D( - t_0[nonuniformEXT(int(_S2.z))], - s_0), - _S2.xy); - - _S1 = _S3; + _S1 = (texture(sampler2D(t_0[nonuniformEXT(int(_S2.z))],s_0), (_S2.xy))); return; } + diff --git a/tests/cross-compile/vk-texture-indexing.slang.glsl b/tests/cross-compile/vk-texture-indexing.slang.glsl index 73513c623..7fd768465 100644 --- a/tests/cross-compile/vk-texture-indexing.slang.glsl +++ b/tests/cross-compile/vk-texture-indexing.slang.glsl @@ -3,31 +3,23 @@ #extension GL_EXT_nonuniform_qualifier : require layout(row_major) uniform; layout(row_major) buffer; - layout(binding = 0) uniform texture2D gParams_textures_0[10]; - float fetchData_0(uvec2 coords_0, uint index_0) { - float _S1 = (texelFetch((gParams_textures_0[nonuniformEXT(index_0)]), ivec2((coords_0)), 0).x); - - return _S1; + return (texelFetch((gParams_textures_0[nonuniformEXT(index_0)]), ivec2((coords_0)), 0).x); } layout(location = 0) -out vec4 _S2; - +out vec4 _S1; flat layout(location = 0) -in uvec3 _S3; - +in uvec3 _S2; void main() { - - _S2 = vec4(fetchData_0(_S3.xy, _S3.z)); - + _S1 = vec4(fetchData_0(_S2.xy, _S2.z)); return; } diff --git a/tests/expected-failure.txt b/tests/expected-failure.txt index 3e158e2d1..df773a46b 100644 --- a/tests/expected-failure.txt +++ b/tests/expected-failure.txt @@ -2,13 +2,7 @@ tests/autodiff/global-param-hoisting.slang.1 (vk) tests/bugs/buffer-swizzle-store.slang.1 (vk) tests/bugs/gh-3075.slang.2 (vk) tests/bugs/ray-query-in-generic.slang.1 (vk) -tests/compute/half-rw-texture-convert.slang.4 (vk) -tests/compute/half-rw-texture-convert2.slang.4 (vk) tests/compute/ray-tracing-inline.slang.1 (vk) -tests/compute/rw-texture-simple.slang.4 (vk) -tests/compute/texture-sample-grad-offset-clamp.slang (vk) -tests/compute/texture-simple.slang.4 (vk) -tests/compute/texture-simpler.slang (vk) tests/language-feature/constants/constexpr-loop.slang.1 (vk) tests/optimization/func-resource-result/func-resource-result-complex.slang.1 (vk) tests/type/texture-sampler/texture-sampler-2d.slang (vk) diff --git a/tests/language-feature/1-vector.slang b/tests/language-feature/1-vector.slang new file mode 100644 index 000000000..bb8cedf3c --- /dev/null +++ b/tests/language-feature/1-vector.slang @@ -0,0 +1,19 @@ +//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK):-shaderobj +//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK):-dx12 -use-dxil -shaderobj +//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK):-cpu -shaderobj +//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK):-vk -shaderobj + +//TEST_INPUT:ubuffer(data=[1 2 3 4], stride=4):out,name=outputBuffer +RWStructuredBuffer<int> outputBuffer; + +// CHECK: 0 +// CHECK-NEXT: 1 +// CHECK-NEXT: 2 +// CHECK-NEXT: 3 + +[numthreads(4, 1, 1)] +void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) +{ + vector<int,1> i = vector<int,1>(dispatchThreadID.x); + outputBuffer[i.x] = i.x; +} diff --git a/tests/language-feature/spirv-asm/truncate.slang b/tests/language-feature/spirv-asm/truncate.slang new file mode 100644 index 000000000..9837890f5 --- /dev/null +++ b/tests/language-feature/spirv-asm/truncate.slang @@ -0,0 +1,62 @@ +//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK):-vk -shaderobj -emit-spirv-directly -output-using-type + +//TEST_INPUT:ubuffer(data=[1 2 3 4], stride=4):out,name=outputBuffer +RWStructuredBuffer<int> outputBuffer; + +// CHECK: 8 +// CHECK-NEXT: 13 +// CHECK-NEXT: 18 +// CHECK-NEXT: 23 + +// +// This test tests the __truncate operator +// +[numthreads(4, 1, 1)] +void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) +{ + int i = dispatchThreadID.x; + int n = outputBuffer[i]; + + int scalar = n; + + // 1-vectors are not valid in SPIR-V + // vector<int, 1> vector1 = vector<int, 1>(n); + + vector<int, 4> vector4 = n + vector<int, 4>(0,1,2,3); + //int expected = 0 + n + n + (n + (n+1) + (n+2)); + + int r = 0; + spirv_asm + { + // scalar to scalar + __truncate $$int %a1 $$int $scalar; + %r1 : $$int = OpIAdd %a1 $r; + + // scalar to 1-vector + // __truncate $$vector<int,1> %a2 $$int $scalar; + // %x1 : $$int = OpCompositeExtract %a2 0; + // %r2 : $$int = OpIAdd %x1 %r1; + %r2 : $$int = OpCopyObject %r1; + + // 1-vector to scalar + // __truncate $$int %a3 $$vector<int,1> $vector1; + // %r3 : $$int = OpIAdd %a3 %r2; + %r3 : $$int = OpCopyObject %r2; + + // n-vector to scalar + __truncate $$int %a4 $$vector<int,4> $vector4; + %r4 : $$int = OpIAdd %a4 %r3; + + // n-vector to m-vector + __truncate $$vector<int,3> %a5 $$vector<int,4> $vector4; + %x2 : $$int = OpCompositeExtract %a5 0; + %x3 : $$int = OpCompositeExtract %a5 1; + %x4 : $$int = OpCompositeExtract %a5 2; + %r5 : $$int = OpIAdd %x2 %r4; + %r6 : $$int = OpIAdd %x3 %r5; + %r7 : $$int = OpIAdd %x4 %r6; + + OpStore &r %r7 + }; + outputBuffer[i] = r; +} diff --git a/tests/legalization/vec1.slang b/tests/legalization/vec1.slang new file mode 100644 index 000000000..f3de085b0 --- /dev/null +++ b/tests/legalization/vec1.slang @@ -0,0 +1,93 @@ +//TEST(smoke,compute):COMPARE_COMPUTE(filecheck-buffer=CHECK):-vk -shaderobj -output-using-type +//TEST(smoke,compute):COMPARE_COMPUTE(filecheck-buffer=CHECK):-vk -shaderobj -emit-spirv-directly -output-using-type + +// CHECK: 23 +// CHECK-NEXT: 23 +// CHECK-NEXT: 23 +// CHECK-NEXT: 23 + +// This test tests that the 1-vector legalization works correctly. + +//TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):out,name=outputBuffer +RWStructuredBuffer<float> outputBuffer; + +// This struct helps test that nested access through 1-vectors works +struct V +{ + // 1-vector of 1-vector + vector<vector<float, 1>, 1> oo; + + // 1-vector of n-vector + vector<vector<float, 4>, 1> on; + + // n-vector of 1-vector + vector<vector<float, 1>, 4> no; +}; + +vector<int, 1> get1Vec(int x) +{ + return x; +} + +V getV() +{ + V v; + + // Test swizzle store + v.oo.x.x = 1; + + // Test assigning into subscript + v.on[0].wzyx = float4(4,3,2,1); + + // Test assigning from vector + v.no.x = vector<float, 1>(1); + + // Test assigning from scalar + v.no.y.x = 2; + + // Test assigning from vector of vector + v.no.wz = vector<vector<float, 1>, 2>(3,4); + + return v; +} + +float sumV(V v) +{ + return v.oo[0][0] + + v.on.x.x + + v.on.x.y + + v.on.x.z + + v.on.x.w + // Test arithmetic + + (v.no.x + v.no.y + v.no.z + v.no.w).x; +} + +float3 splat(vector<float, 1> v) +{ + // Test swizzle + return v.xxx; +} + +// This function helps test that this legalization happens with generic length +// vectors specialized to 1 +float triangle<let N : int>() +{ + vector<float, N> v; + for(int i = 0; i < N; ++i) + v[i] = i+1; + + float ret = 0; + for(int i = 0; i < N; ++i) + ret += v[i]; + return ret; +} + +[numthreads(4, 1, 1)] +void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) +{ + const V v = getV(); + outputBuffer[dispatchThreadID.x] + = sumV(v) + + triangle<1>() + + splat(v.oo.x).z; +} diff --git a/tests/nv-extensions/nv-ray-tracing-motion-blur.slang.glsl b/tests/nv-extensions/nv-ray-tracing-motion-blur.slang.glsl index 7f734bf75..744ee2f44 100644 --- a/tests/nv-extensions/nv-ray-tracing-motion-blur.slang.glsl +++ b/tests/nv-extensions/nv-ray-tracing-motion-blur.slang.glsl @@ -3,18 +3,12 @@ #extension GL_NV_ray_tracing_motion_blur : require layout(row_major) uniform; layout(row_major) buffer; - -#line 5 "tests/nv-extensions/nv-ray-tracing-motion-blur.slang" layout(binding = 0) uniform texture2D samplerPosition_0; - -#line 7 layout(binding = 2) uniform sampler sampler_0; - -#line 6 layout(binding = 1) uniform texture2D samplerNormal_0; @@ -24,8 +18,6 @@ struct Light_0 vec4 color_0; }; - -#line 14 struct Uniforms_0 { Light_0 light_0; @@ -34,8 +26,6 @@ struct Uniforms_0 mat4x4 model_0; }; - -#line 21 layout(binding = 3) layout(std140) uniform _S1 { @@ -44,45 +34,31 @@ layout(std140) uniform _S1 mat4x4 view_0; mat4x4 model_0; }ubo_0; - -#line 26 layout(binding = 5) uniform accelerationStructureEXT as_0; - -#line 24 layout(rgba32f) layout(binding = 4) uniform image2D outputImage_0; - -#line 33 struct ReflectionRay_0 { float color_1; }; - -#line 5218 "hlsl.meta.slang" layout(location = 0) rayPayloadEXT ReflectionRay_0 p_0; - -#line 28 "tests/nv-extensions/nv-ray-tracing-motion-blur.slang" struct ShadowRay_0 { float hitDistance_0; }; - -#line 5286 "hlsl.meta.slang" layout(location = 1) rayPayloadEXT ShadowRay_0 p_1; - -#line 5079 struct RayDesc_0 { vec3 Origin_0; @@ -91,115 +67,60 @@ struct RayDesc_0 float TMax_0; }; - -#line 5243 void TraceMotionRay_0(accelerationStructureEXT AccelerationStructure_0, uint RayFlags_0, uint InstanceInclusionMask_0, uint RayContributionToHitGroupIndex_0, uint MultiplierForGeometryContributionToHitGroupIndex_0, uint MissShaderIndex_0, RayDesc_0 Ray_0, float CurrentTime_0, inout ShadowRay_0 Payload_0) { - -#line 5288 p_1 = Payload_0; traceRayMotionNV(AccelerationStructure_0, RayFlags_0, InstanceInclusionMask_0, RayContributionToHitGroupIndex_0, MultiplierForGeometryContributionToHitGroupIndex_0, MissShaderIndex_0, Ray_0.Origin_0, Ray_0.TMin_0, Ray_0.Direction_0, Ray_0.TMax_0, CurrentTime_0, (1)); - -#line 5302 Payload_0 = p_1; return; } - -#line 3527 float saturate_0(float x_0) { return clamp(x_0, 0.0, 1.0); } - -#line 5168 void TraceRay_0(accelerationStructureEXT AccelerationStructure_1, uint RayFlags_1, uint InstanceInclusionMask_1, uint RayContributionToHitGroupIndex_1, uint MultiplierForGeometryContributionToHitGroupIndex_1, uint MissShaderIndex_1, RayDesc_0 Ray_1, inout ReflectionRay_0 Payload_1) { - -#line 5220 p_0 = Payload_1; traceRayEXT(AccelerationStructure_1, RayFlags_1, InstanceInclusionMask_1, RayContributionToHitGroupIndex_1, MultiplierForGeometryContributionToHitGroupIndex_1, MissShaderIndex_1, Ray_1.Origin_0, Ray_1.TMin_0, Ray_1.Direction_0, Ray_1.TMax_0, (0)); - -#line 5233 Payload_1 = p_0; return; } - -#line 38 "tests/nv-extensions/nv-ray-tracing-motion-blur.slang" void main() { uvec3 _S2 = ((gl_LaunchIDEXT)); - -#line 40 ivec2 launchID_0 = ivec2(_S2.xy); uvec3 _S3 = ((gl_LaunchSizeEXT)); - -#line 41 ivec2 launchSize_0 = ivec2(_S3.xy); - vec2 inUV_0 = vec2((float(launchID_0.x) + 0.5) / float(launchSize_0.x), (float(launchID_0.y) + 0.5) / float(launchSize_0.y)); - -#line 48 - vec4 _S4 = (texture(sampler2D(samplerPosition_0,sampler_0), (inUV_0))); - -#line 48 - vec3 P_0 = _S4.xyz; - vec4 _S5 = (texture(sampler2D(samplerNormal_0,sampler_0), (inUV_0))); - -#line 49 - vec3 N_0 = _S5.xyz * 2.0 - 1.0; - - + vec3 P_0 = (texture(sampler2D(samplerPosition_0,sampler_0), (inUV_0))).xyz; + vec3 N_0 = (texture(sampler2D(samplerNormal_0,sampler_0), (inUV_0))).xyz * 2.0 - 1.0; vec3 lightDelta_0 = ubo_0.light_0.position_0.xyz - P_0; float lightDist_0 = length(lightDelta_0); vec3 L_0 = normalize(lightDelta_0); - float _S6 = 1.0 / (lightDist_0 * lightDist_0); - + float _S4 = 1.0 / (lightDist_0 * lightDist_0); RayDesc_0 ray_0; ray_0.Origin_0 = P_0; ray_0.TMin_0 = 0.00000099999999747524; ray_0.Direction_0 = lightDelta_0; ray_0.TMax_0 = lightDist_0; - - ShadowRay_0 shadowRay_0; shadowRay_0.hitDistance_0 = 0.0; - - - TraceMotionRay_0(as_0, 1U, 255U, 0U, 0U, 2U, ray_0, 1.0, shadowRay_0); - -#line 69 float atten_0; - -#line 87 if(shadowRay_0.hitDistance_0 < lightDist_0) { - -#line 87 atten_0 = 0.0; - -#line 87 } else { - -#line 87 - atten_0 = _S6; - -#line 87 + atten_0 = _S4; } - -#line 93 vec3 color_2 = ubo_0.light_0.color_0.xyz * saturate_0(dot(N_0, L_0)) * atten_0; - - ReflectionRay_0 reflectionRay_0; TraceRay_0(as_0, 1U, 255U, 0U, 0U, 2U, ray_0, reflectionRay_0); - -#line 117 imageStore((outputImage_0), ivec2((uvec2(launchID_0))), vec4(color_2 + reflectionRay_0.color_1, 1.0)); return; } diff --git a/tests/vkray/anyhit.slang.glsl b/tests/vkray/anyhit.slang.glsl index 9d3584e1f..8255599b9 100644 --- a/tests/vkray/anyhit.slang.glsl +++ b/tests/vkray/anyhit.slang.glsl @@ -1,4 +1,3 @@ -// anyhit.slang.glsl #version 460 #extension GL_EXT_ray_tracing : require layout(row_major) uniform; @@ -12,8 +11,7 @@ layout(binding = 0) layout(std140) uniform _S1 { int mode_0; -} gParams_0; - +}gParams_0; layout(binding = 1) uniform texture2D gParams_alphaMap_0; @@ -24,24 +22,21 @@ struct SphereHitAttributes_0 { vec3 normal_0; }; + hitAttributeEXT SphereHitAttributes_0 _S2; struct ShadowRay_0 { vec4 hitDistance_0; }; + rayPayloadInEXT ShadowRay_0 _S3; void main() { if(gParams_0.mode_0 != 0) { - float val_0 = textureLod( - sampler2D(gParams_alphaMap_0, gParams_sampler_0), - _S2.normal_0.xy, - (0.0)).x; - - if(val_0 > 0.0) + if((textureLod(sampler2D(gParams_alphaMap_0,gParams_sampler_0), (_S2.normal_0.xy), (0.0)).x) > 0.0) { terminateRayEXT;; } @@ -50,7 +45,6 @@ void main() ignoreIntersectionEXT;; } } - return; } diff --git a/tests/vkray/callable.slang.glsl b/tests/vkray/callable.slang.glsl index dd99cc440..871ffcbb7 100644 --- a/tests/vkray/callable.slang.glsl +++ b/tests/vkray/callable.slang.glsl @@ -1,18 +1,12 @@ #version 460 - -#if USE_NV_RT -#extension GL_NV_ray_tracing : require -#define callableDataInEXT callableDataInNV -#define hitAttributeEXT hitAttributeNV -#define ignoreIntersectionEXT ignoreIntersectionNV -#define rayPayloadInEXT rayPayloadInNV -#define terminateRayEXT terminateRayNV -#else #extension GL_EXT_ray_tracing : require -#endif +layout(row_major) uniform; +layout(row_major) buffer; +layout(binding = 0) +uniform texture2D gAlbedoMap_0; -layout(binding = 0) uniform texture2D gAlbedoMap_0; -layout(binding = 1) uniform sampler gSampler_0; +layout(binding = 1) +uniform sampler gSampler_0; struct MaterialPayload_0 { @@ -24,12 +18,7 @@ callableDataInEXT MaterialPayload_0 _S1; void main() { - vec4 _S2 = textureLod( - sampler2D(gAlbedoMap_0,gSampler_0), - _S1.uv_0, - float(0)); - - _S1.albedo_0 = _S2; - + _S1.albedo_0 = (textureLod(sampler2D(gAlbedoMap_0,gSampler_0), (_S1.uv_0), (0.0))); return; } + diff --git a/tests/vkray/raygen.slang.glsl b/tests/vkray/raygen.slang.glsl index 69dc74c53..80a63d5ad 100644 --- a/tests/vkray/raygen.slang.glsl +++ b/tests/vkray/raygen.slang.glsl @@ -10,6 +10,7 @@ uniform sampler sampler_0; layout(binding = 1) uniform texture2D samplerNormal_0; + struct Light_0 { vec4 position_0; @@ -95,23 +96,18 @@ void main() uvec3 _S6 = ((gl_LaunchIDEXT)); float _S7 = float(_S6.y) + 0.5; uvec3 _S8 = ((gl_LaunchSizeEXT)); - vec2 inUV_0 = vec2(_S5, _S7 / float(_S8.y)); - vec4 _S9 = (texture(sampler2D(samplerPosition_0,sampler_0), (inUV_0))); - vec3 P_0 = _S9.xyz; - vec4 _S10 = (texture(sampler2D(samplerNormal_0,sampler_0), (inUV_0))); - vec3 N_0 = _S10.xyz * 2.0 - 1.0; - + vec3 P_0 = (texture(sampler2D(samplerPosition_0,sampler_0), (inUV_0))).xyz; + vec3 N_0 = (texture(sampler2D(samplerNormal_0,sampler_0), (inUV_0))).xyz * 2.0 - 1.0; vec3 lightDelta_0 = ubo_0.light_0.position_0.xyz - P_0; float lightDist_0 = length(lightDelta_0); vec3 L_0 = normalize(lightDelta_0); - float _S11 = 1.0 / (lightDist_0 * lightDist_0); + float _S9 = 1.0 / (lightDist_0 * lightDist_0); RayDesc_0 ray_0; ray_0.Origin_0 = P_0; ray_0.TMin_0 = 0.00000099999999747524; ray_0.Direction_0 = lightDelta_0; ray_0.TMax_0 = lightDist_0; - ShadowRay_0 shadowRay_0; shadowRay_0.hitDistance_0 = 0.0; TraceRay_0(as_0, 1U, 255U, 0U, 0U, 2U, ray_0, shadowRay_0); @@ -122,14 +118,14 @@ void main() } else { - atten_0 = _S11; + atten_0 = _S9; } vec3 color_2 = ubo_0.light_0.color_0.xyz * saturate_0(dot(N_0, L_0)) * atten_0; - ReflectionRay_0 reflectionRay_0; TraceRay_1(as_0, 1U, 255U, 0U, 0U, 2U, ray_0, reflectionRay_0); vec3 color_3 = color_2 + reflectionRay_0.color_1; - uvec3 _S12 = ((gl_LaunchIDEXT)); - imageStore((outputImage_0), ivec2((uvec2(ivec2(_S12.xy)))), vec4(color_3, 1.0)); + uvec3 _S10 = ((gl_LaunchIDEXT)); + imageStore((outputImage_0), ivec2((uvec2(ivec2(_S10.xy)))), vec4(color_3, 1.0)); return; } + |
