summaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
authorjsmall-nvidia <jsmall@nvidia.com>2020-02-20 18:24:00 -0500
committerGitHub <noreply@github.com>2020-02-20 15:24:00 -0800
commit1f401d04e32c6feaeb35243ea5bfc2b14520344b (patch)
tree64394bc2f9fbef3dec3237c69604a0277d019f3c /tools
parentf9d99fde581c7dfdeb46e87f32da1fed8ac5441c (diff)
WIP on RWTexture types on CUDA/CPU (#1234)
* CUDA support for array of resources. * * Add support for Texture2DArray on CPU * Expand texture-simple.slang to test Texture2DArray * Reorganise CUDAComputeUtil to split out createTextureResource. * Add TextureCubeArray support for CPU/CUDA targets. * Pulled out CUDAResource Renamed derived classes to reflect that change. * Creation of SurfObject type. * Functions to return read/write access for simplifying future additions. * WIP for RWTexture access on CPU/CUDA. * CUsurfObject cannot have mips. * Ability to set number of mips on test data. Preliminary support for CUsurfObj and RWTexture1D on CUDA. CUDA docs improvements. * Fix typo.
Diffstat (limited to 'tools')
-rw-r--r--tools/render-test/cpu-compute-util.cpp66
-rw-r--r--tools/render-test/cuda/cuda-compute-util.cpp139
-rw-r--r--tools/render-test/cuda/cuda-compute-util.h15
-rw-r--r--tools/render-test/shader-input-layout.cpp13
-rw-r--r--tools/render-test/shader-input-layout.h1
5 files changed, 173 insertions, 61 deletions
diff --git a/tools/render-test/cpu-compute-util.cpp b/tools/render-test/cpu-compute-util.cpp
index 608da9461..3826ccec1 100644
--- a/tools/render-test/cpu-compute-util.cpp
+++ b/tools/render-test/cpu-compute-util.cpp
@@ -247,19 +247,61 @@ struct ValueTextureCubeArray : public CPUComputeUtil::Resource, public CPPPrelud
float m_value;
};
-static CPUComputeUtil::Resource* _newValueTexture(SlangResourceShape shape, int elemCount, float value)
+
+template <int COUNT>
+struct ValueRWTexture1D : public CPUComputeUtil::Resource, public CPPPrelude::IRWTexture1D
+{
+ void set(void* out)
+ {
+ float* dst = (float*)out;
+ for (int i = 0; i < COUNT; ++i)
+ {
+ dst[i] = m_value;
+ }
+ }
+
+ virtual void Load(int32_t loc, void* out) SLANG_OVERRIDE
+ {
+ set(out);
+ }
+
+ ValueRWTexture1D(float value) :
+ m_value(value)
+ {
+ m_interface = static_cast<CPPPrelude::IRWTexture1D*>(this);
+ }
+
+ float m_value;
+};
+
+
+static CPUComputeUtil::Resource* _newValueTexture(SlangResourceShape shape, SlangResourceAccess access, Index elemCount, float value)
{
switch (shape)
{
case SLANG_TEXTURE_1D:
{
- switch (elemCount)
+ if (access == SLANG_RESOURCE_ACCESS_READ_WRITE)
{
- case 1: return new ValueTexture1D<1>(value);
- case 2: return new ValueTexture1D<2>(value);
- case 3: return new ValueTexture1D<3>(value);
- case 4: return new ValueTexture1D<4>(value);
- default: break;
+ switch (elemCount)
+ {
+ case 1: return new ValueRWTexture1D<1>(value);
+ case 2: return new ValueRWTexture1D<2>(value);
+ case 3: return new ValueRWTexture1D<3>(value);
+ case 4: return new ValueRWTexture1D<4>(value);
+ default: break;
+ }
+ }
+ else
+ {
+ switch (elemCount)
+ {
+ case 1: return new ValueTexture1D<1>(value);
+ case 2: return new ValueTexture1D<2>(value);
+ case 3: return new ValueTexture1D<3>(value);
+ case 4: return new ValueTexture1D<4>(value);
+ default: break;
+ }
}
break;
}
@@ -388,7 +430,7 @@ static CPUComputeUtil::Resource* _newValueTexture(SlangResourceShape shape, int
auto type = typeLayout->getType();
auto shape = type->getResourceShape();
- //auto access = type->getResourceAccess();
+ auto access = type->getResourceAccess();
auto baseShape = shape & SLANG_RESOURCE_BASE_SHAPE_MASK;
switch (baseShape)
@@ -407,22 +449,22 @@ static CPUComputeUtil::Resource* _newValueTexture(SlangResourceShape shape, int
slang::TypeReflection* typeReflection = typeLayout->getResourceResultType();
- int count = 1;
+ Index count = 1;
if (typeReflection->getKind() == slang::TypeReflection::Kind::Vector)
{
- count = int(typeReflection->getElementCount());
+ count = Index(typeReflection->getElementCount());
}
switch (srcEntry.textureDesc.content)
{
case InputTextureContent::One:
{
- value->m_target = _newValueTexture(shape, count, 1.0f);
+ value->m_target = _newValueTexture(shape, access, count, 1.0f);
break;
}
case InputTextureContent::Zero:
{
- value->m_target = _newValueTexture(shape, count, 0.0f);
+ value->m_target = _newValueTexture(shape, access, count, 0.0f);
break;
}
default: break;
diff --git a/tools/render-test/cuda/cuda-compute-util.cpp b/tools/render-test/cuda/cuda-compute-util.cpp
index bce98c1cb..b21b22b30 100644
--- a/tools/render-test/cuda/cuda-compute-util.cpp
+++ b/tools/render-test/cuda/cuda-compute-util.cpp
@@ -28,13 +28,13 @@ SLANG_FORCE_INLINE static bool _isError(cudaError_t result) { return result != 0
#define SLANG_CUDA_ASSERT_ON_FAIL(x) { auto _res = x; if (_isError(_res)) { SLANG_ASSERT(!"Failed CUDA call"); }; }
-class CUDAResource : public CUDAComputeUtil::ResourceBase
+class MemoryCUDAResource : public CUDAResource
{
public:
- typedef CUDAComputeUtil::ResourceBase Super;
+ typedef CUDAResource Super;
/// Dtor
- ~CUDAResource()
+ ~MemoryCUDAResource()
{
if (m_cudaMemory)
{
@@ -42,27 +42,31 @@ public:
}
}
- static CUDAResource* getCUDAResource(BindSet::Value* value)
+ static MemoryCUDAResource* asResource(BindSet::Value* value)
{
- return value ? dynamic_cast<CUDAResource*>(value->m_target.Ptr()) : nullptr;
+ return value ? dynamic_cast<MemoryCUDAResource*>(value->m_target.Ptr()) : nullptr;
}
- /// Helper function to get the cuda memory pointer when given a value
+ /// Helper function to get the CUDA memory pointer when given a value
static CUdeviceptr getCUDAData(BindSet::Value* value)
{
- auto resource = getCUDAResource(value);
+ auto resource = asResource(value);
return resource ? resource->m_cudaMemory : CUdeviceptr();
}
CUdeviceptr m_cudaMemory = CUdeviceptr();
};
-class CUDATextureResource : public CUDAComputeUtil::ResourceBase
+class TextureCUDAResource : public CUDAResource
{
public:
- typedef CUDAComputeUtil::ResourceBase Super;
+ typedef CUDAResource Super;
- ~CUDATextureResource()
+ ~TextureCUDAResource()
{
+ if (m_cudaSurfObj)
+ {
+ SLANG_CUDA_ASSERT_ON_FAIL(cuSurfObjectDestroy(m_cudaSurfObj));
+ }
if (m_cudaTexObj)
{
SLANG_CUDA_ASSERT_ON_FAIL(cuTexObjectDestroy(m_cudaTexObj));
@@ -77,20 +81,30 @@ public:
}
}
- static CUDATextureResource* getCUDATextureResource(BindSet::Value* value)
+ static TextureCUDAResource* asResource(BindSet::Value* value)
{
- return value ? dynamic_cast<CUDATextureResource*>(value->m_target.Ptr()) : nullptr;
+ return value ? dynamic_cast<TextureCUDAResource*>(value->m_target.Ptr()) : nullptr;
}
- static CUtexObject getCUDATexObject(BindSet::Value* value)
+ static CUtexObject getTexObject(BindSet::Value* value)
{
- auto resource = getCUDATextureResource(value);
+ auto resource = asResource(value);
// It's an assumption here that 0 is okay for null. Seems to work...
return resource ? resource->m_cudaTexObj : CUtexObject(0);
}
- // This is an opaque type, that's backed by a long long
+ static CUsurfObject getSurfObject(BindSet::Value* value)
+ {
+ auto resource = asResource(value);
+ return resource ? resource->m_cudaSurfObj : CUsurfObject(0);
+ }
+
+ // The texObject is for reading 'texture' like things. This is an opaque type, that's backed by a long long
CUtexObject m_cudaTexObj = CUtexObject();
+
+ // The surfObj is for reading/writing 'texture like' things, but not for sampling.
+ CUsurfObject m_cudaSurfObj = CUsurfObject();
+
CUarray m_cudaArray = CUarray();
CUmipmappedArray m_cudaMipMappedArray = CUmipmappedArray();
};
@@ -335,20 +349,42 @@ public:
return SLANG_SUCCEEDED(context.init(0));
}
-/* static */SlangResult CUDAComputeUtil::createTextureResource(const ShaderInputLayoutEntry& srcEntry, slang::TypeLayoutReflection* typeLayout, RefPtr<ResourceBase>& outResource)
+static bool _hasReadAccess(SlangResourceAccess access)
+{
+ return access = SLANG_RESOURCE_ACCESS_READ || access == SLANG_RESOURCE_ACCESS_READ_WRITE;
+}
+
+static bool _hasWriteAccess(SlangResourceAccess access)
+{
+ return access == SLANG_RESOURCE_ACCESS_READ_WRITE;
+}
+
+/* static */SlangResult CUDAComputeUtil::createTextureResource(const ShaderInputLayoutEntry& srcEntry, slang::TypeLayoutReflection* typeLayout, RefPtr<CUDAResource>& outResource)
{
auto type = typeLayout->getType();
auto shape = type->getResourceShape();
auto access = type->getResourceAccess();
+ if (!(access == SLANG_RESOURCE_ACCESS_READ ||
+ access == SLANG_RESOURCE_ACCESS_READ_WRITE))
+ {
+ SLANG_ASSERT(!"Only read or read write currently supported");
+ return SLANG_FAIL;
+ }
+
CUresourcetype resourceType = CU_RESOURCE_TYPE_ARRAY;
auto baseShape = shape & SLANG_RESOURCE_BASE_SHAPE_MASK;
slang::TypeReflection* typeReflection = typeLayout->getResourceResultType();
- const auto& textureDesc = srcEntry.textureDesc;
+ InputTextureDesc textureDesc = srcEntry.textureDesc;
+ if (_hasWriteAccess(access))
+ {
+ textureDesc.mipMapCount = 1;
+ }
+
// CUDA wants the unused dimensions to be 0.
// Might need to specially handle elsewhere
int width = textureDesc.size;
@@ -384,13 +420,13 @@ public:
return SLANG_FAIL;
}
}
-
+
TextureData texData;
generateTextureData(texData, textureDesc);
auto mipLevels = texData.mipLevels;
- RefPtr<CUDATextureResource> tex = new CUDATextureResource;
+ RefPtr<TextureCUDAResource> tex = new TextureCUDAResource;
size_t elementSize = 0;
@@ -486,6 +522,11 @@ public:
arrayDesc.Format = format;
arrayDesc.NumChannels = numChannels;
+ if (baseShape == SLANG_TEXTURE_CUBE)
+ {
+ arrayDesc.Flags |= CUDA_ARRAY3D_CUBEMAP;
+ }
+
SLANG_CUDA_RETURN_ON_FAIL(cuArray3DCreate(&tex->m_cudaArray, &arrayDesc));
}
else if (baseShape == SLANG_TEXTURE_3D || baseShape == SLANG_TEXTURE_CUBE)
@@ -553,7 +594,6 @@ public:
}
SLANG_ASSERT(dstArray);
-
// Check using the desc to see if it's plausible
{
CUDA_ARRAY_DESCRIPTOR arrayDesc;
@@ -710,15 +750,25 @@ public:
resDesc.res.mipmap.hMipmappedArray = tex->m_cudaMipMappedArray;
}
- CUDA_TEXTURE_DESC texDesc;
- memset(&texDesc, 0, sizeof(CUDA_TEXTURE_DESC));
- texDesc.addressMode[0] = CU_TR_ADDRESS_MODE_WRAP;
- texDesc.addressMode[1] = CU_TR_ADDRESS_MODE_WRAP;
- texDesc.addressMode[2] = CU_TR_ADDRESS_MODE_WRAP;
- texDesc.filterMode = CU_TR_FILTER_MODE_LINEAR;
- texDesc.flags = CU_TRSF_NORMALIZED_COORDINATES;
+ if (_hasWriteAccess(access))
+ {
+ // If has write access it's effectively UAV, and so doesn't have sampling available
+ SLANG_CUDA_RETURN_ON_FAIL(cuSurfObjectCreate(&tex->m_cudaSurfObj, &resDesc));
+ }
+ else
+ {
+ // If read only it's a SRV and can sample, but cannot write
+ CUDA_TEXTURE_DESC texDesc;
+ memset(&texDesc, 0, sizeof(CUDA_TEXTURE_DESC));
+ texDesc.addressMode[0] = CU_TR_ADDRESS_MODE_WRAP;
+ texDesc.addressMode[1] = CU_TR_ADDRESS_MODE_WRAP;
+ texDesc.addressMode[2] = CU_TR_ADDRESS_MODE_WRAP;
+ texDesc.filterMode = CU_TR_FILTER_MODE_LINEAR;
+ texDesc.flags = CU_TRSF_NORMALIZED_COORDINATES;
+
+ SLANG_CUDA_RETURN_ON_FAIL(cuTexObjectCreate(&tex->m_cudaTexObj, &resDesc, &texDesc, nullptr));
+ }
- SLANG_CUDA_RETURN_ON_FAIL(cuTexObjectCreate(&tex->m_cudaTexObj, &resDesc, &texDesc, nullptr));
}
outResource = tex;
@@ -782,7 +832,7 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp
case slang::TypeReflection::Kind::ParameterBlock:
{
// We can construct the buffers. We can't copy into yet, as we need to set all of the bindings first
- RefPtr<CUDAResource> resource = new CUDAResource;
+ RefPtr<MemoryCUDAResource> resource = new MemoryCUDAResource;
SLANG_CUDA_RETURN_ON_FAIL(cuMemAlloc(&resource->m_cudaMemory, value->m_sizeInBytes));
value->m_target = resource;
break;
@@ -801,7 +851,7 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp
case SLANG_TEXTURE_3D:
case SLANG_TEXTURE_CUBE:
{
- RefPtr<CUDAComputeUtil::ResourceBase> resource;
+ RefPtr<CUDAResource> resource;
SLANG_RETURN_ON_FAIL(CUDAComputeUtil::createTextureResource(entries[value->m_userIndex], typeLayout, resource));
value->m_target = resource;
break;
@@ -817,7 +867,7 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp
case SLANG_STRUCTURED_BUFFER:
{
// On CPU we just use the memory in the BindSet buffer, so don't need to create anything
- RefPtr<CUDAResource> resource = new CUDAResource;
+ RefPtr<MemoryCUDAResource> resource = new MemoryCUDAResource;
SLANG_CUDA_RETURN_ON_FAIL(cuMemAlloc(&resource->m_cudaMemory, value->m_sizeInBytes));
value->m_target = resource;
break;
@@ -853,7 +903,7 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp
if (elementCount == 0)
{
CUDAComputeUtil::Array array = { CUdeviceptr(), 0 };
- auto resource = CUDAResource::getCUDAResource(value);
+ auto resource = MemoryCUDAResource::asResource(value);
if (resource)
{
array.data = resource->m_cudaMemory;
@@ -868,7 +918,7 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp
case slang::TypeReflection::Kind::ParameterBlock:
{
// These map down to just pointers
- *location.getUniform<CUdeviceptr>() = CUDAResource::getCUDAData(value);
+ *location.getUniform<CUdeviceptr>() = MemoryCUDAResource::getCUDAData(value);
break;
}
case slang::TypeReflection::Kind::Resource:
@@ -876,14 +926,14 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp
auto type = typeLayout->getType();
auto shape = type->getResourceShape();
- //auto access = type->getResourceAccess();
+ auto access = type->getResourceAccess();
switch (shape & SLANG_RESOURCE_BASE_SHAPE_MASK)
{
case SLANG_STRUCTURED_BUFFER:
{
CUDAComputeUtil::StructuredBuffer buffer = { CUdeviceptr(), 0 };
- auto resource = CUDAResource::getCUDAResource(value);
+ auto resource = MemoryCUDAResource::asResource(value);
if (resource)
{
buffer.data = resource->m_cudaMemory;
@@ -897,7 +947,7 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp
{
CUDAComputeUtil::ByteAddressBuffer buffer = { CUdeviceptr(), 0 };
- auto resource = CUDAResource::getCUDAResource(value);
+ auto resource = MemoryCUDAResource::asResource(value);
if (resource)
{
buffer.data = resource->m_cudaMemory;
@@ -912,7 +962,14 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp
case SLANG_TEXTURE_3D:
case SLANG_TEXTURE_CUBE:
{
- *location.getUniform<CUtexObject>() = CUDATextureResource::getCUDATexObject(value);
+ if (_hasWriteAccess(access))
+ {
+ *location.getUniform<CUsurfObject>() = TextureCUDAResource::getSurfObject(value);
+ }
+ else
+ {
+ *location.getUniform<CUtexObject>() = TextureCUDAResource::getTexObject(value);
+ }
break;
}
@@ -929,7 +986,7 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp
const auto& values = bindSet.getValues();
for (BindSet::Value* value : values)
{
- CUdeviceptr cudaMem = CUDAResource::getCUDAData(value);
+ CUdeviceptr cudaMem = MemoryCUDAResource::getCUDAData(value);
if (value && value->m_data && cudaMem)
{
// Okay copy the data over...
@@ -950,8 +1007,8 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp
SLANG_CUDA_RETURN_ON_FAIL(cuFuncGetAttribute(&sharedSizeInBytes, CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES, kernel));
// Work out the args
- CUdeviceptr uniformCUDAData = CUDAResource::getCUDAData(bindRoot.getRootValue());
- CUdeviceptr entryPointCUDAData = CUDAResource::getCUDAData(bindRoot.getEntryPointValue());
+ CUdeviceptr uniformCUDAData = MemoryCUDAResource::getCUDAData(bindRoot.getRootValue());
+ CUdeviceptr entryPointCUDAData = MemoryCUDAResource::getCUDAData(bindRoot.getEntryPointValue());
// NOTE! These are pointers to the cuda memory pointers
void* args[] = { &entryPointCUDAData , &uniformCUDAData };
@@ -987,7 +1044,7 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp
if (entry.isOutput)
{
// Copy back to CPU memory
- CUdeviceptr cudaMem = CUDAResource::getCUDAData(value);
+ CUdeviceptr cudaMem = MemoryCUDAResource::getCUDAData(value);
if (value && value->m_data && cudaMem)
{
// Okay copy the data back...
diff --git a/tools/render-test/cuda/cuda-compute-util.h b/tools/render-test/cuda/cuda-compute-util.h
index 58a5bba33..f15c9d4e3 100644
--- a/tools/render-test/cuda/cuda-compute-util.h
+++ b/tools/render-test/cuda/cuda-compute-util.h
@@ -8,10 +8,16 @@
namespace renderer_test {
+// Base class for CUDA resources. This includes textures but also
+// memory allocations
+class CUDAResource : public RefObject
+{
+public:
+};
struct CUDAComputeUtil
{
- // Define here, so we don't need to include the cude header
+ // Define here, so we don't need to include the CUDA header
typedef size_t CUdeviceptr;
/// NOTE! MUST match up to definitions in the CUDA prelude
@@ -40,12 +46,7 @@ struct CUDAComputeUtil
List<BindSet::Value*> m_buffers;
};
- class ResourceBase : public RefObject
- {
- public:
- };
-
- static SlangResult createTextureResource(const ShaderInputLayoutEntry& srcEntry, slang::TypeLayoutReflection* typeLayout, RefPtr<ResourceBase>& outResource);
+ static SlangResult createTextureResource(const ShaderInputLayoutEntry& srcEntry, slang::TypeLayoutReflection* typeLayout, RefPtr<CUDAResource>& outResource);
static SlangResult execute(const ShaderCompilerUtil::OutputAndLayout& outputAndLayout, const uint32_t dispatchSize[3], Context& outContext);
diff --git a/tools/render-test/shader-input-layout.cpp b/tools/render-test/shader-input-layout.cpp
index 108483a2a..f9d6a60e1 100644
--- a/tools/render-test/shader-input-layout.cpp
+++ b/tools/render-test/shader-input-layout.cpp
@@ -452,6 +452,12 @@ namespace renderer_test
entry.textureDesc.format = format;
entry.bufferDesc.format = format;
}
+ else if(word == "mipMaps")
+ {
+ parser.Read("=");
+ entry.textureDesc.mipMapCount = int(parser.ReadInt());
+ }
+
if (parser.LookAhead(","))
parser.Read(",");
else
@@ -974,7 +980,12 @@ namespace renderer_test
arraySize *= 6;
output.arraySize = arraySize;
output.textureSize = inputDesc.size;
- output.mipLevels = Math::Log2Floor(output.textureSize) + 1;
+
+ const Index maxMipLevels = Math::Log2Floor(output.textureSize) + 1;
+ Index mipLevels = (inputDesc.mipMapCount <= 0) ? maxMipLevels : inputDesc.mipMapCount;
+ mipLevels = (mipLevels > maxMipLevels) ? maxMipLevels : mipLevels;
+
+ output.mipLevels = int(mipLevels);
output.dataBuffer.setCount(output.mipLevels * output.arraySize);
int slice = 0;
diff --git a/tools/render-test/shader-input-layout.h b/tools/render-test/shader-input-layout.h
index a9d525d47..0831f73bb 100644
--- a/tools/render-test/shader-input-layout.h
+++ b/tools/render-test/shader-input-layout.h
@@ -33,6 +33,7 @@ struct InputTextureDesc
bool isDepthTexture = false;
bool isRWTexture = false;
int size = 4;
+ int mipMapCount = 0; ///< 0 means the maximum number of mips will be bound
Format format = Format::RGBA_Unorm_UInt8;