4 files changed, 133 insertions, 3 deletions
diff --git a/source/slang/core.meta.slang b/source/slang/core.meta.slang
index ec1a3ed0b..722629034 100644
--- a/source/slang/core.meta.slang
+++ b/source/slang/core.meta.slang
@@ -777,6 +777,67 @@ for (int tt = 0; tt < kBaseTextureTypeCount; ++tt)
                     sb << ")$z\")\n";
 
                 }
+
+                // CUDA
+                if (isMultisample)
+                {
+                }
+                else
+                {
+                    if (access == SLANG_RESOURCE_ACCESS_READ_WRITE)
+                    {
+                        const int coordCount = kBaseTextureTypes[tt].coordCount;
+                        const int vecCount = coordCount + int(isArray);
+
+                        if( baseShape != TextureFlavor::Shape::ShapeCube )
+                        {
+                            sb << "__target_intrinsic(cuda, \"surf" << coordCount << "D";
+                            if (isArray)
+                            {
+                                sb << "Layered";
+                            }
+                            sb << "read";
+                            sb << "<$T0>($0";
+                            for (int i = 0; i < coordCount; ++i)
+                            {
+                                sb << ", ($1)";
+                                if (vecCount > 1)
+                                {
+                                    sb << '.' << char(i + 'x');
+                                }
+                            }
+                            if (isArray)
+                            {
+                                sb << ", int(($1)." << char(coordCount + 'x') << ")";
+                            }
+                            sb << ", SLANG_CUDA_BOUNDARY_MODE)\")\n";
+                        }
+                        else
+                        {
+                            sb << "__target_intrinsic(cuda, \"surfCubemap";
+                            if (isArray)
+                            {
+                                sb << "Layered";
+                            }
+                            sb << "read";
+                            sb << "<$T0>($0, ($1).x, ($1).y, ($1).z"; 
+                            if (isArray)
+                            {
+                                sb << ", int(($1).w)";
+                            }
+                            sb << ", SLANG_CUDA_BOUNDARY_MODE)\")\n";
+                        }
+                    }
+                    else if (access == SLANG_RESOURCE_ACCESS_READ)
+                    {
+                        // We can allow this on Texture1D
+                        if( baseShape == TextureFlavor::Shape::Shape1D && isArray == false)
+                        {
+                            sb << "__target_intrinsic(cuda, \"tex1Dfetch<$T0>($0, ($1).x)\")\n";
+                        }
+                    }
+                }
+
                 sb << "T Load(";
                 sb << "int" << loadCoordCount << " location";
                 if(isMultisample)
@@ -785,6 +846,7 @@ for (int tt = 0; tt < kBaseTextureTypeCount; ++tt)
                 }
                 sb << ");\n";
 
+                // GLSL
                 if (isMultisample)
                 {
                     sb << "__glsl_extension(GL_EXT_samplerless_texture_functions)";
@@ -804,6 +866,9 @@ for (int tt = 0; tt < kBaseTextureTypeCount; ++tt)
                     }
                     sb << ", $2)$z\")\n";
                 }
+
+
+
                 sb << "T Load(";
                 sb << "int" << loadCoordCount << " location";
                 if(isMultisample)
diff --git a/source/slang/core.meta.slang.h b/source/slang/core.meta.slang.h
index a8ad43965..ba960b1d1 100644
--- a/source/slang/core.meta.slang.h
+++ b/source/slang/core.meta.slang.h
@@ -798,6 +798,67 @@ for (int tt = 0; tt < kBaseTextureTypeCount; ++tt)
                     sb << ")$z\")\n";
 
                 }
+
+                // CUDA
+                if (isMultisample)
+                {
+                }
+                else
+                {
+                    if (access == SLANG_RESOURCE_ACCESS_READ_WRITE)
+                    {
+                        const int coordCount = kBaseTextureTypes[tt].coordCount;
+                        const int vecCount = coordCount + int(isArray);
+
+                        if( baseShape != TextureFlavor::Shape::ShapeCube )
+                        {
+                            sb << "__target_intrinsic(cuda, \"surf" << coordCount << "D";
+                            if (isArray)
+                            {
+                                sb << "Layered";
+                            }
+                            sb << "read";
+                            sb << "<$T0>($0";
+                            for (int i = 0; i < coordCount; ++i)
+                            {
+                                sb << ", ($1)";
+                                if (vecCount > 1)
+                                {
+                                    sb << '.' << char(i + 'x');
+                                }
+                            }
+                            if (isArray)
+                            {
+                                sb << ", int(($1)." << char(coordCount + 'x') << ")";
+                            }
+                            sb << ", SLANG_CUDA_BOUNDARY_MODE)\")\n";
+                        }
+                        else
+                        {
+                            sb << "__target_intrinsic(cuda, \"surfCubemap";
+                            if (isArray)
+                            {
+                                sb << "Layered";
+                            }
+                            sb << "read";
+                            sb << "<$T0>($0, ($1).x, ($1).y, ($1).z"; 
+                            if (isArray)
+                            {
+                                sb << ", int(($1).w)";
+                            }
+                            sb << ", SLANG_CUDA_BOUNDARY_MODE)\")\n";
+                        }
+                    }
+                    else if (access == SLANG_RESOURCE_ACCESS_READ)
+                    {
+                        // We can allow this on Texture1D
+                        if( baseShape == TextureFlavor::Shape::Shape1D && isArray == false)
+                        {
+                            sb << "__target_intrinsic(cuda, \"tex1Dfetch<$T0>($0, ($1).x)\")\n";
+                        }
+                    }
+                }
+
                 sb << "T Load(";
                 sb << "int" << loadCoordCount << " location";
                 if(isMultisample)
@@ -806,6 +867,7 @@ for (int tt = 0; tt < kBaseTextureTypeCount; ++tt)
                 }
                 sb << ");\n";
 
+                // GLSL
                 if (isMultisample)
                 {
                     sb << "__glsl_extension(GL_EXT_samplerless_texture_functions)";
@@ -825,6 +887,9 @@ for (int tt = 0; tt < kBaseTextureTypeCount; ++tt)
                     }
                     sb << ", $2)$z\")\n";
                 }
+
+
+
                 sb << "T Load(";
                 sb << "int" << loadCoordCount << " location";
                 if(isMultisample)
@@ -1359,7 +1424,7 @@ for (auto op : binaryOps)
         sb << "__intrinsic_op(" << int(op.opCode) << ") matrix<" << resultType << ",N,M> operator" << op.opName << "(" << leftQual << "matrix<" << leftType << ",N,M> left, " << rightType << " right);\n";
     }
 }
-SLANG_RAW("#line 1341 \"core.meta.slang\"")
+SLANG_RAW("#line 1406 \"core.meta.slang\"")
 SLANG_RAW("\n")
 SLANG_RAW("\n")
 SLANG_RAW("// Specialized function\n")
diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang
index 988c6f69c..c3339cbb5 100644
--- a/source/slang/hlsl.meta.slang
+++ b/source/slang/hlsl.meta.slang
@@ -1433,7 +1433,7 @@ __generic<T : __BuiltinType, let N : int, let M : int> uint4 WaveMatch(matrix<T,
 
 // TODO(JS): For CUDA the article claims mask has to be used carefully
 // https://devblogs.nvidia.com/using-cuda-warp-level-primitives/
-// With the Warp intrinsics there is though mask, and it's just the 'active lanes'. So __activemask()
+// With the Warp intrinsics there is no mask, and it's just the 'active lanes'. So __activemask()
 // seems to be appropriate.
 
 __target_intrinsic(cuda, "(__all_sync(__activemask(), $0) != 0)") 
diff --git a/source/slang/hlsl.meta.slang.h b/source/slang/hlsl.meta.slang.h
index 8614fd756..69349d9dc 100644
--- a/source/slang/hlsl.meta.slang.h
+++ b/source/slang/hlsl.meta.slang.h
@@ -1509,7 +1509,7 @@ SLANG_RAW("__generic<T : __BuiltinType, let N : int, let M : int> uint4 WaveMatc
 SLANG_RAW("\n")
 SLANG_RAW("// TODO(JS): For CUDA the article claims mask has to be used carefully\n")
 SLANG_RAW("// https://devblogs.nvidia.com/using-cuda-warp-level-primitives/\n")
-SLANG_RAW("// With the Warp intrinsics there is though mask, and it's just the 'active lanes'. So __activemask()\n")
+SLANG_RAW("// With the Warp intrinsics there is no mask, and it's just the 'active lanes'. So __activemask()\n")
 SLANG_RAW("// seems to be appropriate.\n")
 SLANG_RAW("\n")
 SLANG_RAW("__target_intrinsic(cuda, \"(__all_sync(__activemask(), $0) != 0)\") \n")