summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--docs/cuda-target.md1
-rw-r--r--prelude/slang-cpp-types.h32
-rw-r--r--source/slang/core.meta.slang79
-rw-r--r--source/slang/core.meta.slang.h81
-rw-r--r--source/slang/slang-emit-cuda.cpp44
-rw-r--r--tests/compute/texture-simple.slang8
-rw-r--r--tests/compute/texture-simple.slang.expected.txt8
-rw-r--r--tools/render-test/cpu-compute-util.cpp107
-rw-r--r--tools/render-test/cuda/cuda-compute-util.cpp52
9 files changed, 321 insertions, 91 deletions
diff --git a/docs/cuda-target.md b/docs/cuda-target.md
index 79251251b..743e9830c 100644
--- a/docs/cuda-target.md
+++ b/docs/cuda-target.md
@@ -17,6 +17,7 @@ These limitations apply to Slang transpiling to CUDA.
* Only supports the 'texture object' style binding (The texture object API is only supported on devices of compute capability 3.0 or higher. )
* Samplers are not separate objects in CUDA - they are combined into a single 'TextureObject'. So samplers are effectively ignored on CUDA targets.
* Whilst there is tex1Dfetch there are no equivalents for higher dimensions - so such accesses are not currently supported
+* When using a TextureArray (layered texture in CUDA) - the index will be treated as an int, as this is all CUDA allows
The following are a work in progress or not implmented but are planned to be so in the future
diff --git a/prelude/slang-cpp-types.h b/prelude/slang-cpp-types.h
index 936233afc..4c6848b9f 100644
--- a/prelude/slang-cpp-types.h
+++ b/prelude/slang-cpp-types.h
@@ -279,6 +279,38 @@ struct Texture3D
ITexture3D* texture;
};
+struct ITextureCube
+{
+ virtual void Sample(SamplerState samplerState, const float3& loc, void* out) = 0;
+ virtual void SampleLevel(SamplerState samplerState, const float3& loc, float level, void* out) = 0;
+};
+
+template <typename T>
+struct TextureCube
+{
+ T Sample(SamplerState samplerState, const float3& v) const { T out; texture->Sample(samplerState, v, &out); return out; }
+ T SampleLevel(SamplerState samplerState, const float3& v, float level) { T out; texture->SampleLevel(samplerState, v, level, &out); return out; }
+
+ ITextureCube* texture;
+};
+
+struct ITexture1DArray
+{
+ virtual void Load(const int3& v, void* out) = 0;
+ virtual void Sample(SamplerState samplerState, const float2& loc, void* out) = 0;
+ virtual void SampleLevel(SamplerState samplerState, const float2& loc, float level, void* out) = 0;
+};
+
+template <typename T>
+struct Texture1DArray
+{
+ T Load(const int3& v) const { T out; texture->Load(v, &out); return out; }
+ T Sample(SamplerState samplerState, const float2& v) const { T out; texture->Sample(samplerState, v, &out); return out; }
+ T SampleLevel(SamplerState samplerState, const float2& v, float level) { T out; texture->SampleLevel(samplerState, v, level, &out); return out; }
+
+ ITexture1DArray* texture;
+};
+
/* Varying input for Compute */
/* Used when running a single thread */
diff --git a/source/slang/core.meta.slang b/source/slang/core.meta.slang
index 450cc4512..ec1a3ed0b 100644
--- a/source/slang/core.meta.slang
+++ b/source/slang/core.meta.slang
@@ -894,22 +894,47 @@ for (int tt = 0; tt < kBaseTextureTypeCount; ++tt)
sb << "__target_intrinsic(glsl, \"$ctexture($p, $2)$z\")\n";
- if( baseShape != TextureFlavor::Shape::ShapeCube )
+ // CUDA
{
- sb << "__target_intrinsic(cuda, \"tex" << kBaseTextureTypes[tt].coordCount << "D<$T0>($0";
- for (int i = 0; i < kBaseTextureTypes[tt].coordCount; ++i)
+ const int coordCount = kBaseTextureTypes[tt].coordCount;
+ const int vecCount = coordCount + int(isArray);
+
+ if( baseShape != TextureFlavor::Shape::ShapeCube )
{
- sb << ", ($2)";
- if (kBaseTextureTypes[tt].coordCount > 1)
+ sb << "__target_intrinsic(cuda, \"tex" << coordCount << "D";
+ if (isArray)
{
- sb << '.' << char(i + 'x');
+ sb << "Layered";
}
+ sb << "<$T0>($0";
+ for (int i = 0; i < coordCount; ++i)
+ {
+ sb << ", ($2)";
+ if (vecCount > 1)
+ {
+ sb << '.' << char(i + 'x');
+ }
+ }
+ if (isArray)
+ {
+ sb << ", int(($2)." << char(coordCount + 'x') << ")";
+ }
+ sb << ")\")\n";
+ }
+ else
+ {
+ sb << "__target_intrinsic(cuda, \"texCubemap";
+ if (isArray)
+ {
+ sb << "Layered";
+ }
+ sb << "<$T0>($0, ($2).x, ($2).y, ($2).z";
+ if (isArray)
+ {
+ sb << ", int(($2).w)";
+ }
+ sb << ")\")\n";
}
- sb << ")\")\n";
- }
- else
- {
- sb << "__target_intrinsic(cuda, \"texCubemap<$T0>($0, ($2).x, ($2).y, ($2).z)\")\n";
}
sb << "T Sample(SamplerState s, ";
@@ -939,7 +964,6 @@ for (int tt = 0; tt < kBaseTextureTypeCount; ++tt)
}
sb << "float clamp, out uint status);\n";
-
// `SampleBias()`
sb << "__target_intrinsic(glsl, \"$ctexture($p, $2, $3)$z\")\n";
sb << "T SampleBias(SamplerState s, ";
@@ -1054,24 +1078,45 @@ for (int tt = 0; tt < kBaseTextureTypeCount; ++tt)
sb << "__target_intrinsic(glsl, \"$ctextureLod($p, $2, $3)$z\")\n";
// CUDA
- if (!isArray)
{
+ const int coordCount = kBaseTextureTypes[tt].coordCount;
+ const int vecCount = coordCount + int(isArray);
+
if( baseShape != TextureFlavor::Shape::ShapeCube )
{
- sb << "__target_intrinsic(cuda, \"tex" << kBaseTextureTypes[tt].coordCount << "DLod<$T0>($0";
- for (int i = 0; i < kBaseTextureTypes[tt].coordCount; ++i)
+ sb << "__target_intrinsic(cuda, \"tex" << coordCount << "D";
+ if (isArray)
+ {
+ sb << "Layered";
+ }
+ sb << "Lod<$T0>($0";
+ for (int i = 0; i < coordCount; ++i)
{
sb << ", ($2)";
- if (kBaseTextureTypes[tt].coordCount > 1)
+ if (vecCount > 1)
{
sb << '.' << char(i + 'x');
}
}
+ if (isArray)
+ {
+ sb << ", int(($2)." << char(coordCount + 'x') << ")";
+ }
sb << ", $3)\")\n";
}
else
{
- sb << "__target_intrinsic(cuda, \"texCubemap<$T0>($0, ($2).x, ($2).y, ($2).z)\")\n";
+ sb << "__target_intrinsic(cuda, \"texCubemap";
+ if (isArray)
+ {
+ sb << "Layered";
+ }
+ sb << "Lod<$T0>($0, ($2).x, ($2).y, ($2).z";
+ if (isArray)
+ {
+ sb << ", int(($2).w)";
+ }
+ sb << ", $3)\")\n";
}
}
diff --git a/source/slang/core.meta.slang.h b/source/slang/core.meta.slang.h
index cca8f2e51..a8ad43965 100644
--- a/source/slang/core.meta.slang.h
+++ b/source/slang/core.meta.slang.h
@@ -915,22 +915,47 @@ for (int tt = 0; tt < kBaseTextureTypeCount; ++tt)
sb << "__target_intrinsic(glsl, \"$ctexture($p, $2)$z\")\n";
- if( baseShape != TextureFlavor::Shape::ShapeCube )
+ // CUDA
{
- sb << "__target_intrinsic(cuda, \"tex" << kBaseTextureTypes[tt].coordCount << "D<$T0>($0";
- for (int i = 0; i < kBaseTextureTypes[tt].coordCount; ++i)
+ const int coordCount = kBaseTextureTypes[tt].coordCount;
+ const int vecCount = coordCount + int(isArray);
+
+ if( baseShape != TextureFlavor::Shape::ShapeCube )
{
- sb << ", ($2)";
- if (kBaseTextureTypes[tt].coordCount > 1)
+ sb << "__target_intrinsic(cuda, \"tex" << coordCount << "D";
+ if (isArray)
{
- sb << '.' << char(i + 'x');
+ sb << "Layered";
}
+ sb << "<$T0>($0";
+ for (int i = 0; i < coordCount; ++i)
+ {
+ sb << ", ($2)";
+ if (vecCount > 1)
+ {
+ sb << '.' << char(i + 'x');
+ }
+ }
+ if (isArray)
+ {
+ sb << ", int(($2)." << char(coordCount + 'x') << ")";
+ }
+ sb << ")\")\n";
+ }
+ else
+ {
+ sb << "__target_intrinsic(cuda, \"texCubemap";
+ if (isArray)
+ {
+ sb << "Layered";
+ }
+ sb << "<$T0>($0, ($2).x, ($2).y, ($2).z";
+ if (isArray)
+ {
+ sb << ", int(($2).w)";
+ }
+ sb << ")\")\n";
}
- sb << ")\")\n";
- }
- else
- {
- sb << "__target_intrinsic(cuda, \"texCubemap<$T0>($0, ($2).x, ($2).y, ($2).z)\")\n";
}
sb << "T Sample(SamplerState s, ";
@@ -960,7 +985,6 @@ for (int tt = 0; tt < kBaseTextureTypeCount; ++tt)
}
sb << "float clamp, out uint status);\n";
-
// `SampleBias()`
sb << "__target_intrinsic(glsl, \"$ctexture($p, $2, $3)$z\")\n";
sb << "T SampleBias(SamplerState s, ";
@@ -1075,24 +1099,45 @@ for (int tt = 0; tt < kBaseTextureTypeCount; ++tt)
sb << "__target_intrinsic(glsl, \"$ctextureLod($p, $2, $3)$z\")\n";
// CUDA
- if (!isArray)
{
+ const int coordCount = kBaseTextureTypes[tt].coordCount;
+ const int vecCount = coordCount + int(isArray);
+
if( baseShape != TextureFlavor::Shape::ShapeCube )
{
- sb << "__target_intrinsic(cuda, \"tex" << kBaseTextureTypes[tt].coordCount << "DLod<$T0>($0";
- for (int i = 0; i < kBaseTextureTypes[tt].coordCount; ++i)
+ sb << "__target_intrinsic(cuda, \"tex" << coordCount << "D";
+ if (isArray)
+ {
+ sb << "Layered";
+ }
+ sb << "Lod<$T0>($0";
+ for (int i = 0; i < coordCount; ++i)
{
sb << ", ($2)";
- if (kBaseTextureTypes[tt].coordCount > 1)
+ if (vecCount > 1)
{
sb << '.' << char(i + 'x');
}
}
+ if (isArray)
+ {
+ sb << ", int(($2)." << char(coordCount + 'x') << ")";
+ }
sb << ", $3)\")\n";
}
else
{
- sb << "__target_intrinsic(cuda, \"texCubemap<$T0>($0, ($2).x, ($2).y, ($2).z)\")\n";
+ sb << "__target_intrinsic(cuda, \"texCubemap";
+ if (isArray)
+ {
+ sb << "Layered";
+ }
+ sb << "Lod<$T0>($0, ($2).x, ($2).y, ($2).z";
+ if (isArray)
+ {
+ sb << ", int(($2).w)";
+ }
+ sb << ", $3)\")\n";
}
}
@@ -1314,7 +1359,7 @@ for (auto op : binaryOps)
sb << "__intrinsic_op(" << int(op.opCode) << ") matrix<" << resultType << ",N,M> operator" << op.opName << "(" << leftQual << "matrix<" << leftType << ",N,M> left, " << rightType << " right);\n";
}
}
-SLANG_RAW("#line 1296 \"core.meta.slang\"")
+SLANG_RAW("#line 1341 \"core.meta.slang\"")
SLANG_RAW("\n")
SLANG_RAW("\n")
SLANG_RAW("// Specialized function\n")
diff --git a/source/slang/slang-emit-cuda.cpp b/source/slang/slang-emit-cuda.cpp
index 0bbaafa5b..262a67784 100644
--- a/source/slang/slang-emit-cuda.cpp
+++ b/source/slang/slang-emit-cuda.cpp
@@ -81,57 +81,29 @@ static bool _isSingleNameBasicType(IROp op)
SlangResult CUDASourceEmitter::_calcCUDATextureTypeName(IRTextureTypeBase* texType, StringBuilder& outName)
{
- // texture<float, cudaTextureType2D, cudaReadModeElementType> texRef;
-
// Not clear how to do this yet
- if (texType->isMultisample() || texType->isArray())
+ if (texType->isMultisample())
{
return SLANG_FAIL;
}
- outName << "CUtexObject";
-
-#if 0
- outName << "texture<";
- outName << _getTypeName(texType->getElementType());
- outName << ", ";
-
- switch (texType->GetBaseShape())
- {
- case TextureFlavor::Shape::Shape1D: outName << "cudaTextureType1D"; break;
- case TextureFlavor::Shape::Shape2D: outName << "cudaTextureType2D"; break;
- case TextureFlavor::Shape::Shape3D: outName << "cudaTextureType3D"; break;
- case TextureFlavor::Shape::ShapeCube: outName << "cudaTextureTypeCubemap"; break;
- case TextureFlavor::Shape::ShapeBuffer: outName << "Buffer"; break;
- default:
- SLANG_DIAGNOSE_UNEXPECTED(getSink(), SourceLoc(), "unhandled resource shape");
- return SLANG_FAIL;
- }
-
- outName << ", ";
-
switch (texType->getAccess())
{
case SLANG_RESOURCE_ACCESS_READ:
{
- // Other value is cudaReadModeNormalizedFloat
-
- outName << "cudaReadModeElementType";
- break;
+ outName << "CUtexObject";
+ return SLANG_OK;
}
- default:
+ case SLANG_RESOURCE_ACCESS_READ_WRITE:
{
- SLANG_DIAGNOSE_UNEXPECTED(getSink(), SourceLoc(), "unhandled resource access mode");
- return SLANG_FAIL;
+ outName << "CUsurfObject";
+ return SLANG_OK;
}
+ default: break;
}
-
- outName << ">";
-#endif
- return SLANG_OK;
+ return SLANG_FAIL;
}
-
SlangResult CUDASourceEmitter::calcScalarFuncName(HLSLIntrinsic::Op op, IRBasicType* type, StringBuilder& outBuilder)
{
typedef HLSLIntrinsic::Op Op;
diff --git a/tests/compute/texture-simple.slang b/tests/compute/texture-simple.slang
index e79a26885..3d8fe8619 100644
--- a/tests/compute/texture-simple.slang
+++ b/tests/compute/texture-simple.slang
@@ -12,6 +12,11 @@ Texture1D<float> t1D;
Texture2D<float> t2D;
//TEST_INPUT: Texture3D(size=4, content = one):name t3D
Texture3D<float> t3D;
+//TEST_INPUT: TextureCube(size=4, content = one):name tCube
+TextureCube<float> tCube;
+
+//TEST_INPUT: Texture1D(size=4, content = one, arrayLength=2):name t1DArray
+Texture1DArray<float> t1DArray;
//TEST_INPUT: Sampler:name samplerState
SamplerState samplerState;
@@ -29,6 +34,9 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
val += t1D.SampleLevel(samplerState, u, 0);
val += t2D.SampleLevel(samplerState, float2(u, u), 0);
val += t3D.SampleLevel(samplerState, float3(u, u, u), 0);
+ val += tCube.SampleLevel(samplerState, normalize(float3(u, 1 - u, u)), 0);
+
+ val += t1DArray.SampleLevel(samplerState, float2(u, 0), 0);
outputBuffer[idx] = val;
}
diff --git a/tests/compute/texture-simple.slang.expected.txt b/tests/compute/texture-simple.slang.expected.txt
index e54af3bc8..a10701b2e 100644
--- a/tests/compute/texture-simple.slang.expected.txt
+++ b/tests/compute/texture-simple.slang.expected.txt
@@ -1,4 +1,4 @@
-40400000
-40400000
-40400000
-40400000
+40A00000
+40A00000
+40A00000
+40A00000
diff --git a/tools/render-test/cpu-compute-util.cpp b/tools/render-test/cpu-compute-util.cpp
index d69521e66..7be1a6c88 100644
--- a/tools/render-test/cpu-compute-util.cpp
+++ b/tools/render-test/cpu-compute-util.cpp
@@ -16,6 +16,36 @@ namespace renderer_test {
using namespace Slang;
template <int COUNT>
+struct ValueTextureCube : public CPUComputeUtil::Resource, public CPPPrelude::ITextureCube
+{
+ void set(void* out)
+ {
+ float* dst = (float*)out;
+ for (int i = 0; i < COUNT; ++i)
+ {
+ dst[i] = m_value;
+ }
+ }
+
+ virtual void Sample(CPPPrelude::SamplerState samplerState, const CPPPrelude::float3& loc, void* out) SLANG_OVERRIDE
+ {
+ set(out);
+ }
+ virtual void SampleLevel(CPPPrelude::SamplerState samplerState, const CPPPrelude::float3& loc, float level, void* out) SLANG_OVERRIDE
+ {
+ set(out);
+ }
+
+ ValueTextureCube(float value) :
+ m_value(value)
+ {
+ m_interface = static_cast<CPPPrelude::ITextureCube*>(this);
+ }
+
+ float m_value;
+};
+
+template <int COUNT>
struct ValueTexture3D : public CPUComputeUtil::Resource, public CPPPrelude::ITexture3D
{
void set(void* out)
@@ -118,10 +148,43 @@ struct ValueTexture1D : public CPUComputeUtil::Resource, public CPPPrelude::ITex
};
+template <int COUNT>
+struct ValueTexture1DArray : public CPUComputeUtil::Resource, public CPPPrelude::ITexture1DArray
+{
+ void set(void* out)
+ {
+ float* dst = (float*)out;
+ for (int i = 0; i < COUNT; ++i)
+ {
+ dst[i] = m_value;
+ }
+ }
-static CPUComputeUtil::Resource* _newValueTexture(SlangResourceShape baseShape, int elemCount, float value)
+ virtual void Load(const CPPPrelude::int3& v, void* out) SLANG_OVERRIDE
+ {
+ set(out);
+ }
+ virtual void Sample(CPPPrelude::SamplerState samplerState, const CPPPrelude::float2& loc, void* out) SLANG_OVERRIDE
+ {
+ set(out);
+ }
+ virtual void SampleLevel(CPPPrelude::SamplerState samplerState, const CPPPrelude::float2& loc, float level, void* out) SLANG_OVERRIDE
+ {
+ set(out);
+ }
+
+ ValueTexture1DArray(float value) :
+ m_value(value)
+ {
+ m_interface = static_cast<CPPPrelude::ITexture1DArray*>(this);
+ }
+
+ float m_value;
+};
+
+static CPUComputeUtil::Resource* _newValueTexture(SlangResourceShape shape, int elemCount, float value)
{
- switch (baseShape)
+ switch (shape)
{
case SLANG_TEXTURE_1D:
{
@@ -157,6 +220,30 @@ static CPUComputeUtil::Resource* _newValueTexture(SlangResourceShape baseShape,
default: break;
}
}
+ case SLANG_TEXTURE_CUBE:
+ {
+ switch (elemCount)
+ {
+ case 1: return new ValueTextureCube<1>(value);
+ case 2: return new ValueTextureCube<2>(value);
+ case 3: return new ValueTextureCube<3>(value);
+ case 4: return new ValueTextureCube<4>(value);
+ default: break;
+ }
+ }
+ case SLANG_TEXTURE_1D_ARRAY:
+ {
+ switch (elemCount)
+ {
+ case 1: return new ValueTexture1DArray<1>(value);
+ case 2: return new ValueTexture1DArray<2>(value);
+ case 3: return new ValueTexture1DArray<3>(value);
+ case 4: return new ValueTexture1DArray<4>(value);
+ default: break;
+ }
+ break;
+ }
+
default: break;
}
return nullptr;
@@ -224,10 +311,9 @@ static CPUComputeUtil::Resource* _newValueTexture(SlangResourceShape baseShape,
SLANG_ASSERT(value->m_userIndex >= 0);
auto& srcEntry = layout.entries[value->m_userIndex];
-
- // TODO(JS):
- // We should use the srcEntry to determine what data to store in the texture,
- // it's dimensions etc. For now we just support it being 1.
+ // TODO(JS): Currently we support only textures who's content is either
+ // 0 or 1. This is because this is easy to implement.
+ // Will need to do something better in the future..
slang::TypeReflection* typeReflection = typeLayout->getResourceResultType();
@@ -241,12 +327,12 @@ static CPUComputeUtil::Resource* _newValueTexture(SlangResourceShape baseShape,
{
case InputTextureContent::One:
{
- value->m_target = _newValueTexture(baseShape, count, 1.0f);
+ value->m_target = _newValueTexture(shape, count, 1.0f);
break;
}
case InputTextureContent::Zero:
{
- value->m_target = _newValueTexture(baseShape, count, 0.0f);
+ value->m_target = _newValueTexture(shape, count, 0.0f);
break;
}
default: break;
@@ -335,13 +421,14 @@ static CPUComputeUtil::Resource* _newValueTexture(SlangResourceShape baseShape,
assert(!"unhandled case");
break;
case SLANG_TEXTURE_1D:
+ case SLANG_TEXTURE_2D:
case SLANG_TEXTURE_3D:
case SLANG_TEXTURE_CUBE:
case SLANG_TEXTURE_BUFFER:
- case SLANG_TEXTURE_2D:
{
Resource* targetResource = value ? static_cast<Resource*>(value->m_target.Ptr()) : nullptr;
- *location.getUniform<void*>() = targetResource ? targetResource->getInterface() : nullptr;
+ void* intf = targetResource ? targetResource->getInterface() : nullptr;
+ *location.getUniform<void*>() = intf;
break;
}
case SLANG_STRUCTURED_BUFFER:
diff --git a/tools/render-test/cuda/cuda-compute-util.cpp b/tools/render-test/cuda/cuda-compute-util.cpp
index 59b5e65f6..779cfc96a 100644
--- a/tools/render-test/cuda/cuda-compute-util.cpp
+++ b/tools/render-test/cuda/cuda-compute-util.cpp
@@ -528,6 +528,7 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp
CUDA_ARRAY3D_DESCRIPTOR arrayDesc;
memset(&arrayDesc, 0, sizeof(arrayDesc));
+ // If we have a cubemap the depth is 6
arrayDesc.Depth = depth;
arrayDesc.Height = height;
arrayDesc.Width = width;
@@ -536,6 +537,12 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp
arrayDesc.Flags = 0;
+ if (baseShape == SLANG_TEXTURE_CUBE)
+ {
+ arrayDesc.Depth = 6;
+ arrayDesc.Flags |= CUDA_ARRAY3D_CUBEMAP;
+ }
+
SLANG_CUDA_RETURN_ON_FAIL(cuArray3DCreate(&tex->m_cudaArray, &arrayDesc));
}
else
@@ -554,6 +561,9 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp
}
}
+ // Work space for holding data for uploading if it needs to be rearranged
+ List<uint8_t> workspace;
+
for (int mipLevel = 0; mipLevel < mipLevels; ++mipLevel)
{
int mipWidth = width >> mipLevel;
@@ -564,6 +574,12 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp
mipHeight = (mipHeight == 0) ? 1 : mipHeight;
mipDepth = (mipDepth == 0) ? 1 : mipDepth;
+ // If it's a cubemap then the depth is always 6
+ if (baseShape == SLANG_TEXTURE_CUBE)
+ {
+ mipDepth = 6;
+ }
+
auto dstArray = tex->m_cudaArray;
if (tex->m_cudaMipMappedArray)
{
@@ -572,9 +588,6 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp
}
SLANG_ASSERT(dstArray);
- const auto& srcData = texData.dataBuffer[mipLevel];
-
- SLANG_ASSERT(mipWidth * mipHeight * mipDepth == srcData.getCount());
// Check using the desc to see if it's plausible
{
@@ -582,7 +595,34 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp
SLANG_CUDA_RETURN_ON_FAIL(cuArrayGetDescriptor(&arrayDesc, dstArray));
SLANG_ASSERT(mipWidth == arrayDesc.Width);
- SLANG_ASSERT(mipHeight == arrayDesc.Height);
+ SLANG_ASSERT(mipHeight == arrayDesc.Height || (mipHeight == 1 && arrayDesc.Height == 0));
+ }
+
+ const void* srcDataPtr = nullptr;
+
+ if (baseShape == SLANG_TEXTURE_CUBE)
+ {
+ size_t faceSizeInBytes = elementSize * mipWidth * mipHeight;
+
+ workspace.setCount(faceSizeInBytes * 6);
+
+ // Copy the data over to make contiguous
+ for (Index j = 0; j < 6; j++)
+ {
+ const auto& srcData = texData.dataBuffer[mipLevels * j + mipLevel];
+ SLANG_ASSERT(mipWidth * mipHeight == srcData.getCount());
+
+ ::memcpy(workspace.getBuffer() + faceSizeInBytes * j, srcData.getBuffer(), faceSizeInBytes);
+ }
+
+ srcDataPtr = workspace.getBuffer();
+ }
+ else
+ {
+ const auto& srcData = texData.dataBuffer[mipLevel];
+ SLANG_ASSERT(mipWidth * mipHeight * mipDepth == srcData.getCount());
+
+ srcDataPtr = srcData.getBuffer();
}
switch (baseShape)
@@ -595,7 +635,7 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp
copyParam.dstMemoryType = CU_MEMORYTYPE_ARRAY;
copyParam.dstArray = dstArray;
copyParam.srcMemoryType = CU_MEMORYTYPE_HOST;
- copyParam.srcHost = srcData.getBuffer();
+ copyParam.srcHost = srcDataPtr;
copyParam.srcPitch = mipWidth * elementSize;
copyParam.WidthInBytes = copyParam.srcPitch;
copyParam.Height = mipHeight;
@@ -612,7 +652,7 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp
copyParam.dstArray = dstArray;
copyParam.srcMemoryType = CU_MEMORYTYPE_HOST;
- copyParam.srcHost = srcData.getBuffer();
+ copyParam.srcHost = srcDataPtr;
copyParam.srcPitch = mipWidth * elementSize;
copyParam.WidthInBytes = copyParam.srcPitch;
copyParam.Height = mipHeight;