Support for RWTexture types on CPU and CUDA (#1243)

* Added FloatTextureData as a mechanism to enable CPU based Texture writes. * Add [] RWTexture access for CPU. * Fixed rw-texture-simple.slang.expected.txt * WIP: CUDA stdlib has support for [] surface access. * Made IRWTexture class able to take different locations. Doing a Texture2d access on CUDA works. * Fix bug in outputing UniformState - was missing out padding. Support RWTexture with array. Support RWTexture3D. * Use * for locations for read only textures, so only need a ITexture interface. * Fix problem around application of set/get for CUDA on subscript Texture types.
author: jsmall-nvidia <jsmall@nvidia.com> 2020-02-26 21:13:41 +0000
committer: GitHub <noreply@github.com> 2020-02-26 21:13:41 +0000
commit: 7bce066cfc51296a538c7a7d325133d60e352494 (patch)
tree: ce54b40315274c3a7daaf5781de3d2504d72cba2 /source
parent: 6308a1224672944220a1fee34ae22f70212703a0 (diff)
4 files changed, 220 insertions, 47 deletions
diff --git a/source/slang/core.meta.slang b/source/slang/core.meta.slang
index 722629034..70bc90392 100644
--- a/source/slang/core.meta.slang
+++ b/source/slang/core.meta.slang
@@ -903,37 +903,125 @@ for (int tt = 0; tt < kBaseTextureTypeCount; ++tt)
                 // subscript operator
                 sb << "__subscript(" << uintN << " location) -> T {\n";
 
+                // !!!!!!!!!!!!!!!!!!!! get !!!!!!!!!!!!!!!!!!!!!!!
+
                 // GLSL/SPIR-V distinguished sampled vs. non-sampled images
-                switch( access )
                 {
-                case SLANG_RESOURCE_ACCESS_NONE:
-                case SLANG_RESOURCE_ACCESS_READ:
-                    sb << "__glsl_extension(GL_EXT_samplerless_texture_functions)";
-                    sb << "__target_intrinsic(glsl, \"$ctexelFetch($0, " << ivecN << "($1)";
-                    if( !isMultisample )
+                    switch( access )
                     {
-                        sb << ", 0";
+                    case SLANG_RESOURCE_ACCESS_NONE:
+                    case SLANG_RESOURCE_ACCESS_READ:
+                        sb << "__glsl_extension(GL_EXT_samplerless_texture_functions)";
+                        sb << "__target_intrinsic(glsl, \"$ctexelFetch($0, " << ivecN << "($1)";
+                        if( !isMultisample )
+                        {
+                            sb << ", 0";
+                        }
+                        else
+                        {
+                            // TODO: how to handle passing through sample index?
+                            sb << ", 0";
+                        }
+                        break;
+
+                    default:
+                        sb << "__target_intrinsic(glsl, \"$cimageLoad($0, " << ivecN << "($1)";
+                        if( isMultisample )
+                        {
+                            // TODO: how to handle passing through sample index?
+                            sb << ", 0";
+                        }
+                        break;
                     }
-                    else
+                    sb << ")$z\")\n";
+                }
+
+                // CUDA
+                {
+                    if (access == SLANG_RESOURCE_ACCESS_READ_WRITE)
                     {
-                        // TODO: how to handle passing through sample index?
-                        sb << ", 0";
-                    }
-                    break;
+                        const int coordCount = kBaseTextureTypes[tt].coordCount;
+                        const int vecCount = coordCount + int(isArray);
 
-                default:
-                    sb << "__target_intrinsic(glsl, \"$cimageLoad($0, " << ivecN << "($1)";
-                    if( isMultisample )
+                        sb << "__target_intrinsic(cuda, \"surf";
+                        if( baseShape != TextureFlavor::Shape::ShapeCube )
+                        {
+                            sb << coordCount << "D";
+                        }
+                        else
+                        {
+                            sb << "Cubemap";
+                        }
+
+                        sb << (isArray ? "Layered" : "");
+                        sb << "read<$T0>($0";
+                            
+                        for (int i = 0; i < vecCount; ++i)
+                        {
+                            sb << ", ($1)";
+                            if (vecCount > 1)
+                            {
+                                sb << '.' << char(i + 'x');
+                            }
+                        }
+
+                        sb << ", SLANG_CUDA_BOUNDARY_MODE)\")\n";
+                    }
+                    else if (access == SLANG_RESOURCE_ACCESS_READ)
                     {
-                        // TODO: how to handle passing through sample index?
-                        sb << ", 0";
+                        // We can allow this on Texture1D
+                        if( baseShape == TextureFlavor::Shape::Shape1D && isArray == false)
+                        {
+                            sb << "__target_intrinsic(cuda, \"tex1Dfetch<$T0>($0, $1)\")\n";
+                        }
                     }
-                    break;
                 }
 
+                // Output that has get
+                sb << " get;\n";
+
+                // !!!!!!!!!!!!!!!!!!!! set !!!!!!!!!!!!!!!!!!!!!!!
+
+                if (!(access == SLANG_RESOURCE_ACCESS_NONE || access == SLANG_RESOURCE_ACCESS_READ))
+                {
+                    // GLSL
+                    sb << "__target_intrinsic(glsl, \"imageStore($0, " << ivecN << "($1), $V2)\")\n";
+
+                    // CUDA
+                    {
+                        const int coordCount = kBaseTextureTypes[tt].coordCount;
+                        const int vecCount = coordCount + int(isArray);
+
+                        sb << "__target_intrinsic(cuda, \"surf";
+                        if( baseShape != TextureFlavor::Shape::ShapeCube )
+                        {
+                            sb << coordCount << "D";
+                        }
+                        else
+                        {
+                            sb << "Cubemap";
+                        }
 
-                sb << ")$z\") get;\n";
+                        sb << (isArray ? "Layered" : "");
+                        sb << "write<$T0>($2, $0";
+                        for (int i = 0; i < vecCount; ++i)
+                        {
+                            sb << ", ($1)";
+                            if (vecCount > 1)
+                            {
+                                sb << '.' << char(i + 'x');
+                            }
+                        }
 
+                        sb << ", SLANG_CUDA_BOUNDARY_MODE)\")\n";
+                    }
+
+                    // Set
+                    sb << " set;\n";
+                }
+
+                // !!!!!!!!!!!!!!!!!! ref !!!!!!!!!!!!!!!!!!!!!!!!!
+                
                 // Depending on the access level of the texture type,
                 // we either have just a getter (the default), or both
                 // a getter and setter.
@@ -942,10 +1030,7 @@ for (int tt = 0; tt < kBaseTextureTypeCount; ++tt)
                 case SLANG_RESOURCE_ACCESS_NONE:
                 case SLANG_RESOURCE_ACCESS_READ:
                     break;
-
                 default:
-                    sb << "__target_intrinsic(glsl, \"imageStore($0, " << ivecN << "($1), $V2)\") set;\n";
-
                     sb << "__intrinsic_op(" << int(kIROp_ImageSubscript) << ") ref;\n";
                     break;
                 }
diff --git a/source/slang/core.meta.slang.h b/source/slang/core.meta.slang.h
index ba960b1d1..4c8da2a9a 100644
--- a/source/slang/core.meta.slang.h
+++ b/source/slang/core.meta.slang.h
@@ -924,37 +924,125 @@ for (int tt = 0; tt < kBaseTextureTypeCount; ++tt)
                 // subscript operator
                 sb << "__subscript(" << uintN << " location) -> T {\n";
 
+                // !!!!!!!!!!!!!!!!!!!! get !!!!!!!!!!!!!!!!!!!!!!!
+
                 // GLSL/SPIR-V distinguished sampled vs. non-sampled images
-                switch( access )
                 {
-                case SLANG_RESOURCE_ACCESS_NONE:
-                case SLANG_RESOURCE_ACCESS_READ:
-                    sb << "__glsl_extension(GL_EXT_samplerless_texture_functions)";
-                    sb << "__target_intrinsic(glsl, \"$ctexelFetch($0, " << ivecN << "($1)";
-                    if( !isMultisample )
+                    switch( access )
                     {
-                        sb << ", 0";
+                    case SLANG_RESOURCE_ACCESS_NONE:
+                    case SLANG_RESOURCE_ACCESS_READ:
+                        sb << "__glsl_extension(GL_EXT_samplerless_texture_functions)";
+                        sb << "__target_intrinsic(glsl, \"$ctexelFetch($0, " << ivecN << "($1)";
+                        if( !isMultisample )
+                        {
+                            sb << ", 0";
+                        }
+                        else
+                        {
+                            // TODO: how to handle passing through sample index?
+                            sb << ", 0";
+                        }
+                        break;
+
+                    default:
+                        sb << "__target_intrinsic(glsl, \"$cimageLoad($0, " << ivecN << "($1)";
+                        if( isMultisample )
+                        {
+                            // TODO: how to handle passing through sample index?
+                            sb << ", 0";
+                        }
+                        break;
                     }
-                    else
+                    sb << ")$z\")\n";
+                }
+
+                // CUDA
+                {
+                    if (access == SLANG_RESOURCE_ACCESS_READ_WRITE)
                     {
-                        // TODO: how to handle passing through sample index?
-                        sb << ", 0";
-                    }
-                    break;
+                        const int coordCount = kBaseTextureTypes[tt].coordCount;
+                        const int vecCount = coordCount + int(isArray);
 
-                default:
-                    sb << "__target_intrinsic(glsl, \"$cimageLoad($0, " << ivecN << "($1)";
-                    if( isMultisample )
+                        sb << "__target_intrinsic(cuda, \"surf";
+                        if( baseShape != TextureFlavor::Shape::ShapeCube )
+                        {
+                            sb << coordCount << "D";
+                        }
+                        else
+                        {
+                            sb << "Cubemap";
+                        }
+
+                        sb << (isArray ? "Layered" : "");
+                        sb << "read<$T0>($0";
+                            
+                        for (int i = 0; i < vecCount; ++i)
+                        {
+                            sb << ", ($1)";
+                            if (vecCount > 1)
+                            {
+                                sb << '.' << char(i + 'x');
+                            }
+                        }
+
+                        sb << ", SLANG_CUDA_BOUNDARY_MODE)\")\n";
+                    }
+                    else if (access == SLANG_RESOURCE_ACCESS_READ)
                     {
-                        // TODO: how to handle passing through sample index?
-                        sb << ", 0";
+                        // We can allow this on Texture1D
+                        if( baseShape == TextureFlavor::Shape::Shape1D && isArray == false)
+                        {
+                            sb << "__target_intrinsic(cuda, \"tex1Dfetch<$T0>($0, $1)\")\n";
+                        }
                     }
-                    break;
                 }
 
+                // Output that has get
+                sb << " get;\n";
+
+                // !!!!!!!!!!!!!!!!!!!! set !!!!!!!!!!!!!!!!!!!!!!!
+
+                if (!(access == SLANG_RESOURCE_ACCESS_NONE || access == SLANG_RESOURCE_ACCESS_READ))
+                {
+                    // GLSL
+                    sb << "__target_intrinsic(glsl, \"imageStore($0, " << ivecN << "($1), $V2)\")\n";
+
+                    // CUDA
+                    {
+                        const int coordCount = kBaseTextureTypes[tt].coordCount;
+                        const int vecCount = coordCount + int(isArray);
+
+                        sb << "__target_intrinsic(cuda, \"surf";
+                        if( baseShape != TextureFlavor::Shape::ShapeCube )
+                        {
+                            sb << coordCount << "D";
+                        }
+                        else
+                        {
+                            sb << "Cubemap";
+                        }
 
-                sb << ")$z\") get;\n";
+                        sb << (isArray ? "Layered" : "");
+                        sb << "write<$T0>($2, $0";
+                        for (int i = 0; i < vecCount; ++i)
+                        {
+                            sb << ", ($1)";
+                            if (vecCount > 1)
+                            {
+                                sb << '.' << char(i + 'x');
+                            }
+                        }
 
+                        sb << ", SLANG_CUDA_BOUNDARY_MODE)\")\n";
+                    }
+
+                    // Set
+                    sb << " set;\n";
+                }
+
+                // !!!!!!!!!!!!!!!!!! ref !!!!!!!!!!!!!!!!!!!!!!!!!
+                
                 // Depending on the access level of the texture type,
                 // we either have just a getter (the default), or both
                 // a getter and setter.
@@ -963,10 +1051,7 @@ for (int tt = 0; tt < kBaseTextureTypeCount; ++tt)
                 case SLANG_RESOURCE_ACCESS_NONE:
                 case SLANG_RESOURCE_ACCESS_READ:
                     break;
-
                 default:
-                    sb << "__target_intrinsic(glsl, \"imageStore($0, " << ivecN << "($1), $V2)\") set;\n";
-
                     sb << "__intrinsic_op(" << int(kIROp_ImageSubscript) << ") ref;\n";
                     break;
                 }
@@ -1424,7 +1509,7 @@ for (auto op : binaryOps)
         sb << "__intrinsic_op(" << int(op.opCode) << ") matrix<" << resultType << ",N,M> operator" << op.opName << "(" << leftQual << "matrix<" << leftType << ",N,M> left, " << rightType << " right);\n";
     }
 }
-SLANG_RAW("#line 1406 \"core.meta.slang\"")
+SLANG_RAW("#line 1491 \"core.meta.slang\"")
 SLANG_RAW("\n")
 SLANG_RAW("\n")
 SLANG_RAW("// Specialized function\n")
diff --git a/source/slang/slang-emit-cpp.cpp b/source/slang/slang-emit-cpp.cpp
index f3d16444a..945c070d1 100644
--- a/source/slang/slang-emit-cpp.cpp
+++ b/source/slang/slang-emit-cpp.cpp
@@ -2615,6 +2615,7 @@ void CPPSourceEmitter::_emitUniformStateMembers(const List<EmitAction>& actions,
             // We want to output some padding
             StringBuilder builder;
             builder << "uint8_t _pad" << (padIndex++) << "[" << (paramInfo.offset - offset) << "];\n";
+            m_writer->emit(builder);
         }
 
         emitGlobalInst(paramInfo.inst);
diff --git a/source/slang/slang-type-layout.cpp b/source/slang/slang-type-layout.cpp
index 7e9279964..bd0e6e8e0 100644
--- a/source/slang/slang-type-layout.cpp
+++ b/source/slang/slang-type-layout.cpp
@@ -855,6 +855,10 @@ struct CUDAObjectLayoutRulesImpl : CPUObjectLayoutRulesImpl
                 // It's a pointer to the actual uniform data
                 return SimpleLayoutInfo(LayoutResourceKind::Uniform, sizeof(void*), SLANG_ALIGN_OF(void*));
 
+            case ShaderParameterKind::TextureSampler:
+            case ShaderParameterKind::MutableTextureSampler:
+                // That there is no distinct Sampler on CUDA, so TextureSampler is the same as a Texture
+                // which is an ObjectHandle.
             case ShaderParameterKind::MutableTexture:
             case ShaderParameterKind::TextureUniformBuffer:
             case ShaderParameterKind::Texture:
@@ -882,8 +886,6 @@ struct CUDAObjectLayoutRulesImpl : CPUObjectLayoutRulesImpl
                 // would presumably have to remove use of variables of this kind throughout IR. 
                 return SimpleLayoutInfo(LayoutResourceKind::Uniform, sizeof(void*), SLANG_ALIGN_OF(void*));
 
-            case ShaderParameterKind::TextureSampler:
-            case ShaderParameterKind::MutableTextureSampler:
             case ShaderParameterKind::InputRenderTarget:
                 // TODO: how to handle these?
             default:
author	jsmall-nvidia <jsmall@nvidia.com>	2020-02-26 21:13:41 +0000
committer	GitHub <noreply@github.com>	2020-02-26 21:13:41 +0000
commit	7bce066cfc51296a538c7a7d325133d60e352494 (patch)
tree	ce54b40315274c3a7daaf5781de3d2504d72cba2 /source
parent	6308a1224672944220a1fee34ae22f70212703a0 (diff)