CUDA/CPU resource coverage (#1224)

* Add cubemap support. * Add CUDA fence instrinsics. * Added Gather for CUDA. * Use the CUDA driver API as much as possible. * * Support 1D texture on CPU * WIP on 1D texture on CUDA * Added simplified texture test * Fix test. * Improve texture-simple tests. * * Add CPU support for 3d textures * Add support for mip maps to CUDA * Disable warnings in nvrtc * Update CUDA docs * WIP on 3d texture support. * Add support for 3d textures for CPU and CUDA.
author: jsmall-nvidia <jsmall@nvidia.com> 2020-02-18 12:40:14 -0500
committer: GitHub <noreply@github.com> 2020-02-18 12:40:14 -0500
commit: e109985375712b449d365450b3d3e39416a171ce (patch)
tree: 56a2c805368d5afbfa568e514af0704b8ed7346c
parent: 2c097545eaa324a91a035327abad2e8b4fa60469 (diff)
7 files changed, 251 insertions, 53 deletions
diff --git a/docs/cuda-target.md b/docs/cuda-target.md
index 9c82b1dc9..79251251b 100644
--- a/docs/cuda-target.md
+++ b/docs/cuda-target.md
@@ -20,8 +20,7 @@ These limitations apply to Slang transpiling to CUDA.
 
 The following are a work in progress or not implmented but are planned to be so in the future
 
-* Barriers/Atomics/Complex resource types
-* Preliminary version does maps StructuredBuffers to a pointer - and without boudn checking
+* Resource types including surfaces
 
 # How it works
 
@@ -137,9 +136,8 @@ For a client application - as long as the requirements of the generated code are
 
 That for pass-through usage, prelude is not pre-pended, preludes are for code generation only. 
 */
-virtual SLANG_NO_THROW void SLANG_MCALL setDownstreamCompilerPrelude(
-SlangPassThrough passThrough,
-const char* preludeText) = 0;
+
+void setDownstreamCompilerPrelude(SlangPassThrough passThrough, const char* preludeText);
 ```
 
 The code that sets up the prelude for the test infrastucture and command line usage can be found in ```TestToolUtil::setSessionDefaultPrelude```. Essentially this determines what the absolute path is to `slang-cpp-prelude.h` is and then just makes the prelude `#include "the absolute path"`.
@@ -152,5 +150,3 @@ Language aspects
 Slang follows the HLSL convention that arrays are passed by value. This is in contrast with CUDA where arrays follow C++ conventions and are passed by reference. To make generated CUDA follow this convention an array is turned into a 'FixedArray' struct type. 
 
 To get something more similar to CUDA/C++ operation the array can be marked in out or inout to make it passed by reference. 
-
-
diff --git a/prelude/slang-cpp-types.h b/prelude/slang-cpp-types.h
index 2238727c5..936233afc 100644
--- a/prelude/slang-cpp-types.h
+++ b/prelude/slang-cpp-types.h
@@ -262,6 +262,23 @@ struct Texture2D
     ITexture2D* texture;              
 };
 
+struct ITexture3D
+{
+    virtual void Load(const int4& v, void* out) = 0;
+    virtual void Sample(SamplerState samplerState, const float3& loc, void* out) = 0;
+    virtual void SampleLevel(SamplerState samplerState, const float3& loc, float level, void* out) = 0;
+};
+
+template <typename T>
+struct Texture3D
+{
+    T Load(const int4& v) const { T out; texture->Load(v, &out); return out; }
+    T Sample(SamplerState samplerState, const float3& v) const { T out; texture->Sample(samplerState, v, &out); return out; }
+    T SampleLevel(SamplerState samplerState, const float3& v, float level) { T out; texture->SampleLevel(samplerState, v, level, &out); return out; }
+    
+    ITexture3D* texture;              
+};
+
 /* Varying input for Compute */
 
 /* Used when running a single thread */
diff --git a/source/core/slang-nvrtc-compiler.cpp b/source/core/slang-nvrtc-compiler.cpp
index 6464592a5..f68c4dc01 100644
--- a/source/core/slang-nvrtc-compiler.cpp
+++ b/source/core/slang-nvrtc-compiler.cpp
@@ -297,8 +297,16 @@ SlangResult NVRTCDownstreamCompiler::compile(const CompileOptions& options, RefP
         cmdLine.addArg(include);
     }
 
+    // Neither of these options are strictly required, for general use of nvrtc,
+    // but are enabled to make use withing Slang work more smoothly
     {
+        // Require c++14, as makes initialization construction with {} available and so simplifies code generation
         cmdLine.addArg("-std=c++14");
+
+        // Disable all warnings
+        // This is arguably too much - but nvrtc does not appear to have a mechanism to switch off individual warnings.
+        // I tried the -Xcudafe mechanism but that does not appear to work for nvrtc
+        cmdLine.addArg("-w");
     }
 
     nvrtcProgram program = nullptr;
diff --git a/tests/compute/texture-simple.slang b/tests/compute/texture-simple.slang
index 040af2784..e79a26885 100644
--- a/tests/compute/texture-simple.slang
+++ b/tests/compute/texture-simple.slang
@@ -2,7 +2,7 @@
 //TEST(compute):COMPARE_COMPUTE_EX:-slang -compute
 //TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 
 //TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -profile cs_6_0 -use-dxil
-// TODO(JS): Doesn't work on vk currently
+// TODO(JS): Doesn't work on vk currently, because createTextureView not implemented on vk renderer
 //DISABLE_TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute
 //TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute 
 
@@ -10,6 +10,8 @@
 Texture1D<float> t1D;
 //TEST_INPUT: Texture2D(size=4, content = one):name t2D
 Texture2D<float> t2D;
+//TEST_INPUT: Texture3D(size=4, content = one):name t3D
+Texture3D<float> t3D;
 
 //TEST_INPUT: Sampler:name samplerState
 SamplerState samplerState;
@@ -26,6 +28,7 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
     float val = 0.0f;
     val += t1D.SampleLevel(samplerState, u, 0);
     val += t2D.SampleLevel(samplerState, float2(u, u), 0);
+    val += t3D.SampleLevel(samplerState, float3(u, u, u), 0);
  
     outputBuffer[idx] = val;
 }
diff --git a/tests/compute/texture-simple.slang.expected.txt b/tests/compute/texture-simple.slang.expected.txt
index f5cf6fb10..e54af3bc8 100644
--- a/tests/compute/texture-simple.slang.expected.txt
+++ b/tests/compute/texture-simple.slang.expected.txt
@@ -1,4 +1,4 @@
-40000000
-40000000
-40000000
-40000000
+40400000
+40400000
+40400000
+40400000
diff --git a/tools/render-test/cpu-compute-util.cpp b/tools/render-test/cpu-compute-util.cpp
index d0907482c..d69521e66 100644
--- a/tools/render-test/cpu-compute-util.cpp
+++ b/tools/render-test/cpu-compute-util.cpp
@@ -16,6 +16,40 @@ namespace renderer_test {
 using namespace Slang;
 
 template <int COUNT>
+struct ValueTexture3D : public CPUComputeUtil::Resource, public CPPPrelude::ITexture3D
+{
+    void set(void* out)
+    {
+        float* dst = (float*)out;
+        for (int i = 0; i < COUNT; ++i)
+        {
+            dst[i] = m_value;
+        }
+    }
+
+    virtual void Load(const CPPPrelude::int4& v, void* out) SLANG_OVERRIDE
+    {
+        set(out);
+    }
+    virtual void Sample(CPPPrelude::SamplerState samplerState, const CPPPrelude::float3& loc, void* out) SLANG_OVERRIDE
+    {
+        set(out);
+    }
+    virtual void SampleLevel(CPPPrelude::SamplerState samplerState, const CPPPrelude::float3& loc, float level, void* out) SLANG_OVERRIDE
+    {
+        set(out);
+    }
+
+    ValueTexture3D(float value) :
+        m_value(value)
+    {
+        m_interface = static_cast<CPPPrelude::ITexture3D*>(this);
+    }
+
+    float m_value;
+};
+
+template <int COUNT>
 struct ValueTexture2D : public CPUComputeUtil::Resource, public CPPPrelude::ITexture2D
 {
     void set(void* out)
@@ -83,6 +117,8 @@ struct ValueTexture1D : public CPUComputeUtil::Resource, public CPPPrelude::ITex
     float m_value;
 };
 
+
+
 static CPUComputeUtil::Resource* _newValueTexture(SlangResourceShape baseShape, int elemCount, float value)
 {
     switch (baseShape)
@@ -110,12 +146,22 @@ static CPUComputeUtil::Resource* _newValueTexture(SlangResourceShape baseShape,
                 default: break;
             }
         }
+        case SLANG_TEXTURE_3D:
+        {
+            switch (elemCount)
+            {
+                case 1: return new ValueTexture3D<1>(value);
+                case 2: return new ValueTexture3D<2>(value);
+                case 3: return new ValueTexture3D<3>(value);
+                case 4: return new ValueTexture3D<4>(value);
+                default: break;
+            }
+        }
         default: break;
     }
     return nullptr;
 }
 
-
 /* static */SlangResult CPUComputeUtil::calcBindings(const ShaderCompilerUtil::OutputAndLayout& compilationAndLayout, Context& outContext)
 {
     auto request = compilationAndLayout.output.request;
@@ -172,6 +218,8 @@ static CPUComputeUtil::Resource* _newValueTexture(SlangResourceShape baseShape,
                         {
                             case SLANG_TEXTURE_1D:
                             case SLANG_TEXTURE_2D:
+                            case SLANG_TEXTURE_3D:
+                            case SLANG_TEXTURE_CUBE:
                             {
                                 SLANG_ASSERT(value->m_userIndex >= 0);
                                 auto& srcEntry = layout.entries[value->m_userIndex];
@@ -203,11 +251,15 @@ static CPUComputeUtil::Resource* _newValueTexture(SlangResourceShape baseShape,
                                     }
                                     default: break;
                                 }
+
+                                if (value->m_target == nullptr)
+                                {
+                                    SLANG_ASSERT(!"Couldn't construct resource type");
+                                    return SLANG_FAIL;
+                                }
+
                                 break;
                             }
-                            
-                            case SLANG_TEXTURE_3D:
-                            case SLANG_TEXTURE_CUBE:
                             case SLANG_TEXTURE_BUFFER:
                             {
                                 // Need a CPU impl for these...
diff --git a/tools/render-test/cuda/cuda-compute-util.cpp b/tools/render-test/cuda/cuda-compute-util.cpp
index f471c2961..59b5e65f6 100644
--- a/tools/render-test/cuda/cuda-compute-util.cpp
+++ b/tools/render-test/cuda/cuda-compute-util.cpp
@@ -71,6 +71,10 @@ public:
         {
             SLANG_CUDA_ASSERT_ON_FAIL(cuArrayDestroy(m_cudaArray));
         }
+        if (m_cudaMipMappedArray)
+        {
+            SLANG_CUDA_ASSERT_ON_FAIL(cuMipmappedArrayDestroy(m_cudaMipMappedArray));
+        }
     }
 
     static CUDATextureResource* getCUDATextureResource(BindSet::Value* value)
@@ -88,6 +92,7 @@ public:
     // This is an opaque type, that's backed by a long long
     CUtexObject m_cudaTexObj = CUtexObject();
     CUarray m_cudaArray = CUarray();
+    CUmipmappedArray m_cudaMipMappedArray = CUmipmappedArray();
 };
 
 class ScopeCUDAModule
@@ -405,6 +410,8 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp
 
                         auto access = type->getResourceAccess();
 
+                        CUresourcetype resourceType = CU_RESOURCE_TYPE_ARRAY;
+
                         auto baseShape = shape & SLANG_RESOURCE_BASE_SHAPE_MASK;
 
                         switch (baseShape)
@@ -412,6 +419,7 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp
                             case SLANG_TEXTURE_1D:
                             case SLANG_TEXTURE_2D:
                             case SLANG_TEXTURE_3D:
+                            case SLANG_TEXTURE_CUBE:
                             {
                                 SLANG_ASSERT(value->m_userIndex >= 0);
                                 auto& srcEntry = entries[value->m_userIndex];
@@ -426,9 +434,11 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp
 
                                 const auto& textureDesc = srcEntry.textureDesc;
 
+                                // CUDA wants the unused dimensions to be 0.
+                                // Might need to specially handle elsewhere
                                 int width = textureDesc.size;
-                                int height = 1;
-                                int depth = 1;
+                                int height = 0;
+                                int depth = 0;
 
                                 switch (baseShape)
                                 {
@@ -444,35 +454,40 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp
                                         depth = textureDesc.size;
                                         break;
                                     }
+                                    case SLANG_TEXTURE_CUBE:
+                                    {
+                                        height = width;
+                                        depth = 6;
+                                        break;
+                                    }
                                 }
                                 
                                 TextureData texData;
                                 generateTextureData(texData, textureDesc);
 
+                                auto mipLevels = texData.mipLevels;
+
                                 RefPtr<CUDATextureResource> tex = new CUDATextureResource;
 
                                 size_t elementSize = 0;
 
                                 {
-                                    CUDA_ARRAY_DESCRIPTOR arrayDesc;
-                                    arrayDesc.Width = width;
-
-                                    // Width, and Height are the width, and height of the CUDA array (in elements); the CUDA array is one-dimensional if height is 0, two-dimensional otherwise;
-                                    arrayDesc.Height = (baseShape == SLANG_TEXTURE_1D) ? 0 : height;
+                                    CUarray_format format = CU_AD_FORMAT_FLOAT;
+                                    int numChannels = 0;
 
                                     switch (textureDesc.format)
                                     {
                                         case Format::R_Float32:
                                         {
-                                            arrayDesc.Format = CU_AD_FORMAT_FLOAT;
-                                            arrayDesc.NumChannels = 1;
+                                            format = CU_AD_FORMAT_FLOAT;
+                                            numChannels = 1;
                                             elementSize = sizeof(float);
                                             break;
                                         }
                                         case Format::RGBA_Unorm_UInt8:
                                         {
-                                            arrayDesc.Format = CU_AD_FORMAT_UNSIGNED_INT8;
-                                            arrayDesc.NumChannels = 4;
+                                            format = CU_AD_FORMAT_UNSIGNED_INT8;
+                                            numChannels = 4;
                                             elementSize = sizeof(uint32_t);
                                             break;
                                         }
@@ -483,35 +498,135 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp
                                         }
                                     }
 
-                                    // Allocate the array
-                                    SLANG_CUDA_RETURN_ON_FAIL(cuArrayCreate(&tex->m_cudaArray, &arrayDesc));
+                                    if (mipLevels > 1)
+                                    {
+                                        resourceType = CU_RESOURCE_TYPE_MIPMAPPED_ARRAY;
+
+                                        CUDA_ARRAY3D_DESCRIPTOR arrayDesc;
+                                        memset(&arrayDesc, 0, sizeof(arrayDesc));
+
+                                        arrayDesc.Width = width;
+                                        arrayDesc.Height = height;
+                                        arrayDesc.Depth = depth;
+                                        arrayDesc.Format = format;
+                                        arrayDesc.NumChannels = numChannels;
+                                        arrayDesc.Flags = 0;
+
+                                        if (baseShape == SLANG_TEXTURE_CUBE)
+                                        {
+                                            arrayDesc.Flags |= CUDA_ARRAY3D_CUBEMAP;
+                                        }
+
+                                        SLANG_CUDA_RETURN_ON_FAIL(cuMipmappedArrayCreate(&tex->m_cudaMipMappedArray,  &arrayDesc, mipLevels));
+                                    }
+                                    else
+                                    {
+                                        resourceType = CU_RESOURCE_TYPE_ARRAY;
+
+                                        if (baseShape == SLANG_TEXTURE_3D || baseShape == SLANG_TEXTURE_CUBE)
+                                        {
+                                            CUDA_ARRAY3D_DESCRIPTOR arrayDesc;
+                                            memset(&arrayDesc, 0, sizeof(arrayDesc));
+
+                                            arrayDesc.Depth = depth;
+                                            arrayDesc.Height = height;
+                                            arrayDesc.Width = width;
+                                            arrayDesc.Format = format;
+                                            arrayDesc.NumChannels = numChannels;
+                    
+                                            arrayDesc.Flags = 0;
+
+                                            SLANG_CUDA_RETURN_ON_FAIL(cuArray3DCreate(&tex->m_cudaArray, &arrayDesc));
+                                        }
+                                        else
+                                        {
+                                            CUDA_ARRAY_DESCRIPTOR arrayDesc;
+                                            memset(&arrayDesc, 0, sizeof(arrayDesc));
+
+                                            arrayDesc.Width = width;
+                                            arrayDesc.Height = height;
+                                            arrayDesc.Format = format;
+                                            arrayDesc.NumChannels = numChannels;
+
+                                            // Allocate the array, will work for 1D or 2D case
+                                            SLANG_CUDA_RETURN_ON_FAIL(cuArrayCreate(&tex->m_cudaArray, &arrayDesc));
+                                        }
+                                    }
                                 }
 
-                                switch (baseShape)
+                                for (int mipLevel = 0; mipLevel < mipLevels; ++mipLevel)
                                 {
-                                    case SLANG_TEXTURE_1D:
-                                    case SLANG_TEXTURE_2D:
+                                    int mipWidth = width >> mipLevel;
+                                    int mipHeight = height >> mipLevel;
+                                    int mipDepth = depth >> mipLevel;
+
+                                    mipWidth = (mipWidth == 0) ? 1 : mipWidth;
+                                    mipHeight = (mipHeight == 0) ? 1 : mipHeight;
+                                    mipDepth = (mipDepth == 0) ? 1 : mipDepth;
+
+                                    auto dstArray = tex->m_cudaArray;
+                                    if (tex->m_cudaMipMappedArray)
                                     {
-                                        // TODO(JS):
-                                        // Not clear how the copy should be done for 1D, but seeing as it is copying to an 'array'
-                                        // doing it with cuMemcpy2D is appropriate.
-                                        // Not clear if the height should be 0 or 1. The array required it to be 0.
-                                        CUDA_MEMCPY2D copyParam;
-                                        memset(&copyParam, 0, sizeof(copyParam));
-                                        copyParam.dstMemoryType = CU_MEMORYTYPE_ARRAY;
-                                        copyParam.dstArray = tex->m_cudaArray;
-                                        copyParam.srcMemoryType = CU_MEMORYTYPE_HOST;
-                                        copyParam.srcHost = texData.dataBuffer[0].getBuffer();
-                                        copyParam.srcPitch = width * elementSize;
-                                        copyParam.WidthInBytes = copyParam.srcPitch; 
-                                        copyParam.Height = height;
-                                        SLANG_CUDA_RETURN_ON_FAIL(cuMemcpy2D(&copyParam));
-                                        break;
+                                        // Get the array for the mip level
+                                        SLANG_CUDA_RETURN_ON_FAIL(cuMipmappedArrayGetLevel(&dstArray, tex->m_cudaMipMappedArray, mipLevel));
                                     }
-                                    case SLANG_TEXTURE_3D:
+                                    SLANG_ASSERT(dstArray);
+
+                                    const auto& srcData = texData.dataBuffer[mipLevel];
+
+                                    SLANG_ASSERT(mipWidth * mipHeight * mipDepth == srcData.getCount());
+
+                                    // Check using the desc to see if it's plausible
                                     {
-                                        SLANG_ASSERT(!"Not implemented");
-                                        break;
+                                        CUDA_ARRAY_DESCRIPTOR arrayDesc;
+                                        SLANG_CUDA_RETURN_ON_FAIL(cuArrayGetDescriptor(&arrayDesc, dstArray));
+
+                                        SLANG_ASSERT(mipWidth == arrayDesc.Width);
+                                        SLANG_ASSERT(mipHeight == arrayDesc.Height);
+                                    }
+
+                                    switch (baseShape)
+                                    {
+                                        case SLANG_TEXTURE_1D:
+                                        case SLANG_TEXTURE_2D:
+                                        {                                                                                                                                 
+                                            CUDA_MEMCPY2D copyParam;
+                                            memset(&copyParam, 0, sizeof(copyParam));
+                                            copyParam.dstMemoryType = CU_MEMORYTYPE_ARRAY;
+                                            copyParam.dstArray = dstArray;
+                                            copyParam.srcMemoryType = CU_MEMORYTYPE_HOST;
+                                            copyParam.srcHost = srcData.getBuffer();
+                                            copyParam.srcPitch = mipWidth * elementSize;
+                                            copyParam.WidthInBytes = copyParam.srcPitch; 
+                                            copyParam.Height = mipHeight; 
+                                            SLANG_CUDA_RETURN_ON_FAIL(cuMemcpy2D(&copyParam));
+                                            break;
+                                        }
+                                        case SLANG_TEXTURE_3D:
+                                        case SLANG_TEXTURE_CUBE:
+                                        {                                           
+                                            CUDA_MEMCPY3D copyParam;
+                                            memset(&copyParam, 0, sizeof(copyParam));
+
+                                            copyParam.dstMemoryType = CU_MEMORYTYPE_ARRAY;
+                                            copyParam.dstArray = dstArray;
+
+                                            copyParam.srcMemoryType = CU_MEMORYTYPE_HOST;
+                                            copyParam.srcHost = srcData.getBuffer();
+                                            copyParam.srcPitch = mipWidth * elementSize;
+                                            copyParam.WidthInBytes = copyParam.srcPitch;
+                                            copyParam.Height = mipHeight;
+                                            copyParam.Depth = mipDepth;
+
+                                            SLANG_CUDA_RETURN_ON_FAIL(cuMemcpy3D(&copyParam));
+                                            break;
+                                        }
+
+                                        default:
+                                        {
+                                            SLANG_ASSERT(!"Not implemented");
+                                            break;
+                                        }
                                     }
                                 }
 
@@ -520,8 +635,16 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp
                                 {
                                     CUDA_RESOURCE_DESC resDesc;
                                     memset(&resDesc, 0, sizeof(CUDA_RESOURCE_DESC));
-                                    resDesc.resType = CU_RESOURCE_TYPE_ARRAY;
-                                    resDesc.res.array.hArray = tex->m_cudaArray;
+                                    resDesc.resType = resourceType;
+
+                                    if (tex->m_cudaArray)
+                                    {
+                                        resDesc.res.array.hArray = tex->m_cudaArray;
+                                    }
+                                    if (tex->m_cudaMipMappedArray)
+                                    {
+                                        resDesc.res.mipmap.hMipmappedArray = tex->m_cudaMipMappedArray;
+                                    }
 
                                     CUDA_TEXTURE_DESC texDesc;
                                     memset(&texDesc, 0, sizeof(CUDA_TEXTURE_DESC));
@@ -538,7 +661,6 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp
                                 break;
                             }
 
-                            case SLANG_TEXTURE_CUBE:
                             case SLANG_TEXTURE_BUFFER:
                             {
                                 // Need a CUDA impl for these...
author	jsmall-nvidia <jsmall@nvidia.com>	2020-02-18 12:40:14 -0500
committer	GitHub <noreply@github.com>	2020-02-18 12:40:14 -0500
commit	e109985375712b449d365450b3d3e39416a171ce (patch)
tree	56a2c805368d5afbfa568e514af0704b8ed7346c
parent	2c097545eaa324a91a035327abad2e8b4fa60469 (diff)