summaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
authorjsmall-nvidia <jsmall@nvidia.com>2020-02-18 14:14:16 -0500
committerGitHub <noreply@github.com>2020-02-18 14:14:16 -0500
commit8ee39e08c48a315163fe1850dbb12ca292020d4d (patch)
tree5041064a194849399aa587ac13b46db2088bdb05 /tools
parente109985375712b449d365450b3d3e39416a171ce (diff)
First pass Texture Array support on CUDA/CPU (#1225)
* Add cubemap support. * Add CUDA fence instrinsics. * Added Gather for CUDA. * Use the CUDA driver API as much as possible. * * Support 1D texture on CPU * WIP on 1D texture on CUDA * Added simplified texture test * Fix test. * Improve texture-simple tests. * * Add CPU support for 3d textures * Add support for mip maps to CUDA * Disable warnings in nvrtc * Update CUDA docs * WIP on 3d texture support. * Add support for 3d textures for CPU and CUDA. * CPU and CUDA support for cube maps. * Add CPU support for Texture1DArray. * Support CUDA Layered/Array type in meta library.
Diffstat (limited to 'tools')
-rw-r--r--tools/render-test/cpu-compute-util.cpp107
-rw-r--r--tools/render-test/cuda/cuda-compute-util.cpp52
2 files changed, 143 insertions, 16 deletions
diff --git a/tools/render-test/cpu-compute-util.cpp b/tools/render-test/cpu-compute-util.cpp
index d69521e66..7be1a6c88 100644
--- a/tools/render-test/cpu-compute-util.cpp
+++ b/tools/render-test/cpu-compute-util.cpp
@@ -16,6 +16,36 @@ namespace renderer_test {
using namespace Slang;
template <int COUNT>
+struct ValueTextureCube : public CPUComputeUtil::Resource, public CPPPrelude::ITextureCube
+{
+ void set(void* out)
+ {
+ float* dst = (float*)out;
+ for (int i = 0; i < COUNT; ++i)
+ {
+ dst[i] = m_value;
+ }
+ }
+
+ virtual void Sample(CPPPrelude::SamplerState samplerState, const CPPPrelude::float3& loc, void* out) SLANG_OVERRIDE
+ {
+ set(out);
+ }
+ virtual void SampleLevel(CPPPrelude::SamplerState samplerState, const CPPPrelude::float3& loc, float level, void* out) SLANG_OVERRIDE
+ {
+ set(out);
+ }
+
+ ValueTextureCube(float value) :
+ m_value(value)
+ {
+ m_interface = static_cast<CPPPrelude::ITextureCube*>(this);
+ }
+
+ float m_value;
+};
+
+template <int COUNT>
struct ValueTexture3D : public CPUComputeUtil::Resource, public CPPPrelude::ITexture3D
{
void set(void* out)
@@ -118,10 +148,43 @@ struct ValueTexture1D : public CPUComputeUtil::Resource, public CPPPrelude::ITex
};
+template <int COUNT>
+struct ValueTexture1DArray : public CPUComputeUtil::Resource, public CPPPrelude::ITexture1DArray
+{
+ void set(void* out)
+ {
+ float* dst = (float*)out;
+ for (int i = 0; i < COUNT; ++i)
+ {
+ dst[i] = m_value;
+ }
+ }
-static CPUComputeUtil::Resource* _newValueTexture(SlangResourceShape baseShape, int elemCount, float value)
+ virtual void Load(const CPPPrelude::int3& v, void* out) SLANG_OVERRIDE
+ {
+ set(out);
+ }
+ virtual void Sample(CPPPrelude::SamplerState samplerState, const CPPPrelude::float2& loc, void* out) SLANG_OVERRIDE
+ {
+ set(out);
+ }
+ virtual void SampleLevel(CPPPrelude::SamplerState samplerState, const CPPPrelude::float2& loc, float level, void* out) SLANG_OVERRIDE
+ {
+ set(out);
+ }
+
+ ValueTexture1DArray(float value) :
+ m_value(value)
+ {
+ m_interface = static_cast<CPPPrelude::ITexture1DArray*>(this);
+ }
+
+ float m_value;
+};
+
+static CPUComputeUtil::Resource* _newValueTexture(SlangResourceShape shape, int elemCount, float value)
{
- switch (baseShape)
+ switch (shape)
{
case SLANG_TEXTURE_1D:
{
@@ -157,6 +220,30 @@ static CPUComputeUtil::Resource* _newValueTexture(SlangResourceShape baseShape,
default: break;
}
}
+ case SLANG_TEXTURE_CUBE:
+ {
+ switch (elemCount)
+ {
+ case 1: return new ValueTextureCube<1>(value);
+ case 2: return new ValueTextureCube<2>(value);
+ case 3: return new ValueTextureCube<3>(value);
+ case 4: return new ValueTextureCube<4>(value);
+ default: break;
+ }
+ }
+ case SLANG_TEXTURE_1D_ARRAY:
+ {
+ switch (elemCount)
+ {
+ case 1: return new ValueTexture1DArray<1>(value);
+ case 2: return new ValueTexture1DArray<2>(value);
+ case 3: return new ValueTexture1DArray<3>(value);
+ case 4: return new ValueTexture1DArray<4>(value);
+ default: break;
+ }
+ break;
+ }
+
default: break;
}
return nullptr;
@@ -224,10 +311,9 @@ static CPUComputeUtil::Resource* _newValueTexture(SlangResourceShape baseShape,
SLANG_ASSERT(value->m_userIndex >= 0);
auto& srcEntry = layout.entries[value->m_userIndex];
-
- // TODO(JS):
- // We should use the srcEntry to determine what data to store in the texture,
- // it's dimensions etc. For now we just support it being 1.
+ // TODO(JS): Currently we support only textures who's content is either
+ // 0 or 1. This is because this is easy to implement.
+ // Will need to do something better in the future..
slang::TypeReflection* typeReflection = typeLayout->getResourceResultType();
@@ -241,12 +327,12 @@ static CPUComputeUtil::Resource* _newValueTexture(SlangResourceShape baseShape,
{
case InputTextureContent::One:
{
- value->m_target = _newValueTexture(baseShape, count, 1.0f);
+ value->m_target = _newValueTexture(shape, count, 1.0f);
break;
}
case InputTextureContent::Zero:
{
- value->m_target = _newValueTexture(baseShape, count, 0.0f);
+ value->m_target = _newValueTexture(shape, count, 0.0f);
break;
}
default: break;
@@ -335,13 +421,14 @@ static CPUComputeUtil::Resource* _newValueTexture(SlangResourceShape baseShape,
assert(!"unhandled case");
break;
case SLANG_TEXTURE_1D:
+ case SLANG_TEXTURE_2D:
case SLANG_TEXTURE_3D:
case SLANG_TEXTURE_CUBE:
case SLANG_TEXTURE_BUFFER:
- case SLANG_TEXTURE_2D:
{
Resource* targetResource = value ? static_cast<Resource*>(value->m_target.Ptr()) : nullptr;
- *location.getUniform<void*>() = targetResource ? targetResource->getInterface() : nullptr;
+ void* intf = targetResource ? targetResource->getInterface() : nullptr;
+ *location.getUniform<void*>() = intf;
break;
}
case SLANG_STRUCTURED_BUFFER:
diff --git a/tools/render-test/cuda/cuda-compute-util.cpp b/tools/render-test/cuda/cuda-compute-util.cpp
index 59b5e65f6..779cfc96a 100644
--- a/tools/render-test/cuda/cuda-compute-util.cpp
+++ b/tools/render-test/cuda/cuda-compute-util.cpp
@@ -528,6 +528,7 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp
CUDA_ARRAY3D_DESCRIPTOR arrayDesc;
memset(&arrayDesc, 0, sizeof(arrayDesc));
+ // If we have a cubemap the depth is 6
arrayDesc.Depth = depth;
arrayDesc.Height = height;
arrayDesc.Width = width;
@@ -536,6 +537,12 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp
arrayDesc.Flags = 0;
+ if (baseShape == SLANG_TEXTURE_CUBE)
+ {
+ arrayDesc.Depth = 6;
+ arrayDesc.Flags |= CUDA_ARRAY3D_CUBEMAP;
+ }
+
SLANG_CUDA_RETURN_ON_FAIL(cuArray3DCreate(&tex->m_cudaArray, &arrayDesc));
}
else
@@ -554,6 +561,9 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp
}
}
+ // Work space for holding data for uploading if it needs to be rearranged
+ List<uint8_t> workspace;
+
for (int mipLevel = 0; mipLevel < mipLevels; ++mipLevel)
{
int mipWidth = width >> mipLevel;
@@ -564,6 +574,12 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp
mipHeight = (mipHeight == 0) ? 1 : mipHeight;
mipDepth = (mipDepth == 0) ? 1 : mipDepth;
+ // If it's a cubemap then the depth is always 6
+ if (baseShape == SLANG_TEXTURE_CUBE)
+ {
+ mipDepth = 6;
+ }
+
auto dstArray = tex->m_cudaArray;
if (tex->m_cudaMipMappedArray)
{
@@ -572,9 +588,6 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp
}
SLANG_ASSERT(dstArray);
- const auto& srcData = texData.dataBuffer[mipLevel];
-
- SLANG_ASSERT(mipWidth * mipHeight * mipDepth == srcData.getCount());
// Check using the desc to see if it's plausible
{
@@ -582,7 +595,34 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp
SLANG_CUDA_RETURN_ON_FAIL(cuArrayGetDescriptor(&arrayDesc, dstArray));
SLANG_ASSERT(mipWidth == arrayDesc.Width);
- SLANG_ASSERT(mipHeight == arrayDesc.Height);
+ SLANG_ASSERT(mipHeight == arrayDesc.Height || (mipHeight == 1 && arrayDesc.Height == 0));
+ }
+
+ const void* srcDataPtr = nullptr;
+
+ if (baseShape == SLANG_TEXTURE_CUBE)
+ {
+ size_t faceSizeInBytes = elementSize * mipWidth * mipHeight;
+
+ workspace.setCount(faceSizeInBytes * 6);
+
+ // Copy the data over to make contiguous
+ for (Index j = 0; j < 6; j++)
+ {
+ const auto& srcData = texData.dataBuffer[mipLevels * j + mipLevel];
+ SLANG_ASSERT(mipWidth * mipHeight == srcData.getCount());
+
+ ::memcpy(workspace.getBuffer() + faceSizeInBytes * j, srcData.getBuffer(), faceSizeInBytes);
+ }
+
+ srcDataPtr = workspace.getBuffer();
+ }
+ else
+ {
+ const auto& srcData = texData.dataBuffer[mipLevel];
+ SLANG_ASSERT(mipWidth * mipHeight * mipDepth == srcData.getCount());
+
+ srcDataPtr = srcData.getBuffer();
}
switch (baseShape)
@@ -595,7 +635,7 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp
copyParam.dstMemoryType = CU_MEMORYTYPE_ARRAY;
copyParam.dstArray = dstArray;
copyParam.srcMemoryType = CU_MEMORYTYPE_HOST;
- copyParam.srcHost = srcData.getBuffer();
+ copyParam.srcHost = srcDataPtr;
copyParam.srcPitch = mipWidth * elementSize;
copyParam.WidthInBytes = copyParam.srcPitch;
copyParam.Height = mipHeight;
@@ -612,7 +652,7 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp
copyParam.dstArray = dstArray;
copyParam.srcMemoryType = CU_MEMORYTYPE_HOST;
- copyParam.srcHost = srcData.getBuffer();
+ copyParam.srcHost = srcDataPtr;
copyParam.srcPitch = mipWidth * elementSize;
copyParam.WidthInBytes = copyParam.srcPitch;
copyParam.Height = mipHeight;