summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjsmall-nvidia <jsmall@nvidia.com>2020-01-24 15:06:08 -0500
committerGitHub <noreply@github.com>2020-01-24 15:06:08 -0500
commitb8f294445b998eadb9b09e2b91eb462b881eaf2e (patch)
tree8607e5d2f6c2c2b4b7545a721d6d58e6e557e5c0
parent394983d61efa2bf99ba96aa68a47df8927a8a634 (diff)
Texture Sample available in CUDA (#1176)
* WIP: Trying to figure out how texturing will work with CUDA. * WIP: Fixes for CUDA layout. Initial CUDA texture test. * WIP: Outputs something compilable by CUDA for TextureND.Sample * 2d texture working with CUDA. * Fix how binding for SamplerState occurs in CUDA. * Small tidy up of comments.
-rw-r--r--docs/cuda-target.md4
-rw-r--r--prelude/slang-cuda-prelude.h11
-rw-r--r--source/core/slang-platform.cpp15
-rw-r--r--source/core/slang-platform.h3
-rw-r--r--source/slang/core.meta.slang19
-rw-r--r--source/slang/core.meta.slang.h21
-rw-r--r--source/slang/slang-emit-c-like.cpp4
-rw-r--r--source/slang/slang-emit-cpp.cpp35
-rw-r--r--source/slang/slang-emit-cuda.cpp11
-rw-r--r--source/slang/slang-type-layout.cpp22
-rw-r--r--tests/cuda/cuda-texture.slang22
-rw-r--r--tests/cuda/cuda-texture.slang.expected.txt4
-rw-r--r--tools/render-test/cuda/cuda-compute-util.cpp146
13 files changed, 271 insertions, 46 deletions
diff --git a/docs/cuda-target.md b/docs/cuda-target.md
index db8c98f14..41fc98790 100644
--- a/docs/cuda-target.md
+++ b/docs/cuda-target.md
@@ -14,7 +14,7 @@ Slang has preliminary support for producing CUDA source, and PTX binaries using
These limitations apply to Slang transpiling to CUDA.
-* Only supports the 'texture object' style binding
+* Only supports the 'texture object' style binding (The texture object API is only supported on devices of compute capability 3.0 or higher. )
* Samplers are not separate objects in CUDA - they are combined into a single 'TextureObject'. So samplers are effectively ignored on CUDA targets.
* Whilst there is tex1Dfetch there are no equivalents for higher dimensions - so such accesses are not currently supported
@@ -68,7 +68,7 @@ struct UniformEntryPointParams
struct UniformState
{
CUtexObject tex; // This is the combination of a texture and a sampler(!)
- //SamplerState sampler; // CUDA doesn't have separate sampler objects - so this is just ignored.
+ SamplerState sampler; // This variable exists within the layout, but it's value is not used.
int32_t* outputBuffer; // Currently Structured buffers are converted to pointers - this will likely change in the future (for bounds checking and other reasons)
Thing* thing3; // Constant buffers map to pointers
};
diff --git a/prelude/slang-cuda-prelude.h b/prelude/slang-cuda-prelude.h
index f78814486..28e423b31 100644
--- a/prelude/slang-cuda-prelude.h
+++ b/prelude/slang-cuda-prelude.h
@@ -19,6 +19,17 @@ struct FixedArray
T m_data[SIZE];
};
+// Typically defined in cuda.h, but we can't ship/rely on that, so just define here
+typedef unsigned long long CUtexObject;
+typedef unsigned long long CUsurfObject;
+
+// On CUDA sampler state is actually bound up with the texture object. We have a SamplerState type,
+// backed as a pointer, to simplify code generation, with the downside that such a binding will take up
+// uniform space, even though it will have no effect.
+// TODO(JS): Consider ways to strip use of variables of this type so have no binding,
+struct SamplerStateUnused;
+typedef SamplerStateUnused* SamplerState;
+
// Code generator will generate the specific type
template <typename T, int ROWS, int COLS>
struct Matrix;
diff --git a/source/core/slang-platform.cpp b/source/core/slang-platform.cpp
index e735216f4..d02951e0b 100644
--- a/source/core/slang-platform.cpp
+++ b/source/core/slang-platform.cpp
@@ -1,4 +1,7 @@
// slang-platform.cpp
+
+#define _CRT_SECURE_NO_WARNINGS
+
#include "slang-platform.h"
#include "slang-common.h"
@@ -197,6 +200,18 @@ SLANG_COMPILE_TIME_ASSERT(E_OUTOFMEMORY == SLANG_E_OUT_OF_MEMORY);
#endif // _WIN32
+
+/* static */SlangResult PlatformUtil::getEnvironmentVariable(const UnownedStringSlice& name, StringBuilder& out)
+{
+ const char* value = getenv(String(name).getBuffer());
+ if (value)
+ {
+ out.append(value);
+ return SLANG_OK;
+ }
+ return SLANG_E_NOT_FOUND;
+}
+
/* static */PlatformKind PlatformUtil::getPlatformKind()
{
#if SLANG_WINRT
diff --git a/source/core/slang-platform.h b/source/core/slang-platform.h
index c3ad1c486..767e83c1d 100644
--- a/source/core/slang-platform.h
+++ b/source/core/slang-platform.h
@@ -128,6 +128,9 @@ namespace Slang
/// True if the kind is part of the family
static bool isFamily(PlatformFamily family, PlatformKind kind) { return (getPlatformFlags(family) & (PlatformFlags(1) << int(kind))) != 0; }
+ /// Given an environment name returns the set system variable.
+ /// Will return SLANG_E_NOT_FOUND if the variable is not set
+ static SlangResult getEnvironmentVariable(const UnownedStringSlice& name, StringBuilder& out);
};
#ifndef _MSC_VER
diff --git a/source/slang/core.meta.slang b/source/slang/core.meta.slang
index 80ff09ea8..58e6e287c 100644
--- a/source/slang/core.meta.slang
+++ b/source/slang/core.meta.slang
@@ -770,6 +770,7 @@ for (int tt = 0; tt < kBaseTextureTypeCount; ++tt)
sb << "$1";
}
sb << ")$z\")\n";
+
}
sb << "T Load(";
sb << "int" << loadCoordCount << " location";
@@ -887,6 +888,24 @@ for (int tt = 0; tt < kBaseTextureTypeCount; ++tt)
// `Sample()`
sb << "__target_intrinsic(glsl, \"$ctexture($p, $2)$z\")\n";
+
+ if( baseShape != TextureFlavor::Shape::ShapeCube )
+ {
+ sb << "__target_intrinsic(cuda, \"tex" << kBaseTextureTypes[tt].coordCount << "D<$S0>($0";
+ if (kBaseTextureTypes[tt].coordCount == 1)
+ {
+ sb << ", $2";
+ }
+ else
+ {
+ for (int i = 0; i < kBaseTextureTypes[tt].coordCount; ++i)
+ {
+ sb << ", ($2)." << char(i + 'x');
+ }
+ }
+ sb << ")\")\n";
+ }
+
sb << "T Sample(SamplerState s, ";
sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location);\n";
diff --git a/source/slang/core.meta.slang.h b/source/slang/core.meta.slang.h
index 27811b588..201429700 100644
--- a/source/slang/core.meta.slang.h
+++ b/source/slang/core.meta.slang.h
@@ -791,6 +791,7 @@ for (int tt = 0; tt < kBaseTextureTypeCount; ++tt)
sb << "$1";
}
sb << ")$z\")\n";
+
}
sb << "T Load(";
sb << "int" << loadCoordCount << " location";
@@ -908,6 +909,24 @@ for (int tt = 0; tt < kBaseTextureTypeCount; ++tt)
// `Sample()`
sb << "__target_intrinsic(glsl, \"$ctexture($p, $2)$z\")\n";
+
+ if( baseShape != TextureFlavor::Shape::ShapeCube )
+ {
+ sb << "__target_intrinsic(cuda, \"tex" << kBaseTextureTypes[tt].coordCount << "D<$S0>($0";
+ if (kBaseTextureTypes[tt].coordCount == 1)
+ {
+ sb << ", $2";
+ }
+ else
+ {
+ for (int i = 0; i < kBaseTextureTypes[tt].coordCount; ++i)
+ {
+ sb << ", ($2)." << char(i + 'x');
+ }
+ }
+ sb << ")\")\n";
+ }
+
sb << "T Sample(SamplerState s, ";
sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location);\n";
@@ -1258,7 +1277,7 @@ for (auto op : binaryOps)
sb << "__intrinsic_op(" << int(op.opCode) << ") matrix<" << resultType << ",N,M> operator" << op.opName << "(" << leftQual << "matrix<" << leftType << ",N,M> left, " << rightType << " right);\n";
}
}
-SLANG_RAW("#line 1240 \"core.meta.slang\"")
+SLANG_RAW("#line 1259 \"core.meta.slang\"")
SLANG_RAW("\n")
SLANG_RAW("\n")
SLANG_RAW("// Specialized function\n")
diff --git a/source/slang/slang-emit-c-like.cpp b/source/slang/slang-emit-c-like.cpp
index 10790567e..8e0c64d1a 100644
--- a/source/slang/slang-emit-c-like.cpp
+++ b/source/slang/slang-emit-c-like.cpp
@@ -1250,6 +1250,10 @@ void CLikeSourceEmitter::emitIntrinsicCallExprImpl(
SLANG_RELEASE_ASSERT(argCount > argIndex);
IRType* type = args[argIndex].get()->getDataType();
+ if (auto baseTextureType = as<IRTextureType>(type))
+ {
+ type = baseTextureType->getElementType();
+ }
IRBasicType* underlyingType = nullptr;
if (auto basicType = as<IRBasicType>(type))
diff --git a/source/slang/slang-emit-cpp.cpp b/source/slang/slang-emit-cpp.cpp
index 5ea3c72a2..759f980d3 100644
--- a/source/slang/slang-emit-cpp.cpp
+++ b/source/slang/slang-emit-cpp.cpp
@@ -1912,48 +1912,23 @@ void CPPSourceEmitter::emitIntrinsicCallExprImpl(
return;
}
- auto prec = getInfo(EmitOp::Postfix);
- needClose = maybeEmitParens(outerPrec, prec);
-
- if (name[0] == '.')
- {
- // Looks like a member function call
- emitOperand(args[0].get(), leftSide(outerPrec, prec));
- m_writer->emit(".");
-
- name = UnownedStringSlice(name.begin() + 1, name.end());
-
- args++;
- argCount--;
- }
- else
{
Op op = m_opLookup->getOpByName(name);
if (op != Op::Invalid)
{
-
+
// Work out the intrinsic used
HLSLIntrinsic intrinsic;
m_intrinsicSet.calcIntrinsic(op, inst->getDataType(), args, argCount, intrinsic);
HLSLIntrinsic* specOp = m_intrinsicSet.add(intrinsic);
-
+
emitCall(specOp, inst, args, int(argCount), inOuterPrec);
return;
}
}
-
- m_writer->emit(name);
- m_writer->emit("(");
- for (Index i = 0; i < argCount; ++i)
- {
- if (i != 0)
- {
- m_writer->emit(", ");
- }
- emitOperand(args[i].get(), getInfo(EmitOp::General));
- }
- m_writer->emit(")");
- maybeCloseParens(needClose);
+
+ // Use default impl (which will do intrinsic special macro expansion as necessary)
+ return Super::emitIntrinsicCallExprImpl(inst, targetIntrinsic, inOuterPrec);
}
bool CPPSourceEmitter::_tryEmitInstExprAsIntrinsic(IRInst* inst, const EmitOpInfo& inOuterPrec)
diff --git a/source/slang/slang-emit-cuda.cpp b/source/slang/slang-emit-cuda.cpp
index 93508813b..26d6eada0 100644
--- a/source/slang/slang-emit-cuda.cpp
+++ b/source/slang/slang-emit-cuda.cpp
@@ -89,6 +89,9 @@ SlangResult CUDASourceEmitter::_calcCUDATextureTypeName(IRTextureTypeBase* texTy
return SLANG_FAIL;
}
+ outName << "CUtexObject";
+
+#if 0
outName << "texture<";
outName << _getTypeName(texType->getElementType());
outName << ", ";
@@ -124,6 +127,7 @@ SlangResult CUDASourceEmitter::_calcCUDATextureTypeName(IRTextureTypeBase* texTy
}
outName << ">";
+#endif
return SLANG_OK;
}
@@ -312,6 +316,13 @@ SlangResult CUDASourceEmitter::calcTypeName(IRType* type, CodeGenTarget target,
}
}
+ switch (type->op)
+ {
+ case kIROp_SamplerStateType: out << "SamplerState"; return SLANG_OK;
+ case kIROp_SamplerComparisonStateType: out << "SamplerComparisonState"; return SLANG_OK;
+ default: break;
+ }
+
break;
}
}
diff --git a/source/slang/slang-type-layout.cpp b/source/slang/slang-type-layout.cpp
index 644f54a95..cf793b52d 100644
--- a/source/slang/slang-type-layout.cpp
+++ b/source/slang/slang-type-layout.cpp
@@ -735,25 +735,30 @@ struct CUDAObjectLayoutRulesImpl : CPUObjectLayoutRulesImpl
{
typedef CPUObjectLayoutRulesImpl Super;
+ // cuda.h defines a variety of handle types. We don't want to have to include cuda.h though - as it may not be available
+ // on a build target. So for we define this handle type, that matches cuda.h and is used for types that use this kind
+ // of opaque handle (as opposed to a pointer) such as CUsurfObject, CUtexObject
+ typedef unsigned long long ObjectHandle;
+
virtual SimpleLayoutInfo GetObjectLayout(ShaderParameterKind kind) override
{
switch (kind)
{
case ShaderParameterKind::ConstantBuffer:
// It's a pointer to the actual uniform data
- return SimpleLayoutInfo(LayoutResourceKind::Uniform, sizeof(void*), sizeof(void*));
+ return SimpleLayoutInfo(LayoutResourceKind::Uniform, sizeof(void*), SLANG_ALIGN_OF(void*));
case ShaderParameterKind::MutableTexture:
case ShaderParameterKind::TextureUniformBuffer:
case ShaderParameterKind::Texture:
// It's a pointer to a texture interface
- return SimpleLayoutInfo(LayoutResourceKind::Uniform, sizeof(void*), sizeof(void*));
+ return SimpleLayoutInfo(LayoutResourceKind::Uniform, sizeof(ObjectHandle), SLANG_ALIGN_OF(ObjectHandle));
case ShaderParameterKind::StructuredBuffer:
case ShaderParameterKind::MutableStructuredBuffer:
// TODO(JS): We are just storing as a pointer for now
// It's a ptr and a size of the amount of elements
- return SimpleLayoutInfo(LayoutResourceKind::Uniform, sizeof(void*), sizeof(void*));
+ return SimpleLayoutInfo(LayoutResourceKind::Uniform, sizeof(void*), SLANG_ALIGN_OF(void*));
case ShaderParameterKind::RawBuffer:
case ShaderParameterKind::Buffer:
@@ -763,11 +768,16 @@ struct CUDAObjectLayoutRulesImpl : CPUObjectLayoutRulesImpl
// TODO(JS): We are storing as a pointer for now
// It's a pointer and a size in bytes
- return SimpleLayoutInfo(LayoutResourceKind::Uniform, sizeof(void*), sizeof(void*));
+ return SimpleLayoutInfo(LayoutResourceKind::Uniform, sizeof(void*), SLANG_ALIGN_OF(void*));
case ShaderParameterKind::SamplerState:
- // It's a pointer
- return SimpleLayoutInfo(LayoutResourceKind::Uniform, sizeof(void*), sizeof(void*));
+ // In CUDA it seems that sampler states are combined into texture objects.
+ // So it's a binding issue to combine a sampler with a texture - and sampler are ignored
+ // For simplicity here though - we do create a variable and that variable takes up
+ // uniform binding space.
+ // TODO(JS): If we wanted to remove these variables we'd want to do it as a pass. The pass
+ // would presumably have to remove use of variables of this kind throughout IR.
+ return SimpleLayoutInfo(LayoutResourceKind::Uniform, sizeof(void*), SLANG_ALIGN_OF(void*));
case ShaderParameterKind::TextureSampler:
case ShaderParameterKind::MutableTextureSampler:
diff --git a/tests/cuda/cuda-texture.slang b/tests/cuda/cuda-texture.slang
new file mode 100644
index 000000000..3b5bae8ee
--- /dev/null
+++ b/tests/cuda/cuda-texture.slang
@@ -0,0 +1,22 @@
+//TEST(compute):COMPARE_COMPUTE:-cpu -compute
+//TEST(compute):COMPARE_COMPUTE:-cuda -compute
+
+//TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):out,name=outputBuffer
+RWStructuredBuffer<float> outputBuffer : register(u0);
+
+//TEST_INPUT: Texture2D(size=4, content=one):name texture
+Texture2D<float> texture;
+
+//TEST_INPUT: Sampler:name sampler
+SamplerState sampler;
+
+[numthreads(4, 1, 1)]
+void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
+{
+ int tid = int(dispatchThreadID.x);
+ float u = tid * (1.0f / 3.0f);
+ float v = 1.0f - u;
+ float2 uv = float2(u, v);
+
+ outputBuffer[tid] = texture.Sample(sampler, uv);
+}
diff --git a/tests/cuda/cuda-texture.slang.expected.txt b/tests/cuda/cuda-texture.slang.expected.txt
new file mode 100644
index 000000000..cc5e55ab6
--- /dev/null
+++ b/tests/cuda/cuda-texture.slang.expected.txt
@@ -0,0 +1,4 @@
+3F800000
+3F800000
+3F800000
+3F800000
diff --git a/tools/render-test/cuda/cuda-compute-util.cpp b/tools/render-test/cuda/cuda-compute-util.cpp
index e42a0a53e..a21747554 100644
--- a/tools/render-test/cuda/cuda-compute-util.cpp
+++ b/tools/render-test/cuda/cuda-compute-util.cpp
@@ -58,6 +58,52 @@ public:
void* m_cudaMemory;
};
+class CUDATextureResource : public RefObject
+{
+public:
+ typedef RefObject Super;
+
+ CUDATextureResource(CUtexObject cudaTexObj, CUdeviceptr cudaMemory, CUarray cudaArray):
+ m_cudaTexObj(cudaTexObj),
+ m_cudaMemory(cudaMemory),
+ m_cudaArray(cudaArray)
+ {
+ }
+ ~CUDATextureResource()
+ {
+ if (m_cudaTexObj)
+ {
+ SLANG_CUDA_ASSERT_ON_FAIL(cuTexObjectDestroy(m_cudaTexObj));
+ }
+ if (m_cudaMemory)
+ {
+ SLANG_CUDA_ASSERT_ON_FAIL(cuMemFree(m_cudaMemory));
+ }
+ if (m_cudaArray)
+ {
+ SLANG_CUDA_ASSERT_ON_FAIL(cuArrayDestroy(m_cudaArray));
+ }
+ }
+
+ static CUtexObject getCUDATexObject(BindSet::Value* value)
+ {
+ if (value)
+ {
+ auto resource = dynamic_cast<CUDATextureResource*>(value->m_target.Ptr());
+ // It's an assumption here that 0 is okay for null. Seems to work...
+ return resource ? resource->m_cudaTexObj : CUtexObject(0);
+ }
+
+ return CUtexObject(0);
+ }
+
+protected:
+ // This is an opaque type, that's backed by a long long
+ CUtexObject m_cudaTexObj = CUtexObject();
+ CUdeviceptr m_cudaMemory = CUdeviceptr();
+ CUarray m_cudaArray = CUarray();
+};
+
class ScopeCUDAModule
{
public:
@@ -381,10 +427,6 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp
SLANG_ASSERT(value->m_userIndex >= 0);
auto& srcEntry = entries[value->m_userIndex];
- // TODO(JS):
- // We should use the srcEntry to determine what data to store in the texture,
- // it's dimensions etc. For now we just support it being 1.
-
slang::TypeReflection* typeReflection = typeLayout->getResourceResultType();
int count = 1;
@@ -393,9 +435,90 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp
count = int(typeReflection->getElementCount());
}
- // TODO(JS): Should use the input setup to work how to create this texture
- // Store the target specific value
- //value->m_target = _newOneTexture2D(count);
+ const auto& textureDesc = srcEntry.textureDesc;
+
+ int width = textureDesc.size;
+ int height = textureDesc.size;
+
+ TextureData texData;
+ generateTextureData(texData, textureDesc);
+
+ size_t elementSize = 0;
+
+ CUarray cudaArray;
+ {
+ CUDA_ARRAY_DESCRIPTOR arrayDesc;
+ arrayDesc.Width = width;
+ arrayDesc.Height = height;
+
+ switch (textureDesc.format)
+ {
+ case Format::R_Float32:
+ {
+ arrayDesc.Format = CU_AD_FORMAT_FLOAT;
+ arrayDesc.NumChannels = 1;
+ elementSize = sizeof(float);
+ break;
+ }
+ case Format::RGBA_Unorm_UInt8:
+ {
+ arrayDesc.Format = CU_AD_FORMAT_UNSIGNED_INT8;
+ arrayDesc.NumChannels = 4;
+ elementSize = sizeof(uint32_t);
+ break;
+ }
+ default:
+ {
+ SLANG_ASSERT(!"Only support R_Float32/RGBA_Unorm_UInt8 formats for now");
+ return SLANG_FAIL;
+ }
+ }
+
+ // Allocate the array
+ SLANG_CUDA_RETURN_ON_FAIL(cuArrayCreate(&cudaArray, &arrayDesc));
+ }
+
+ CUdeviceptr cudaMemory = (CUdeviceptr)nullptr;
+ {
+ const size_t size = width * height * elementSize;
+ // allocate device memory for result
+ SLANG_CUDA_RETURN_ON_FAIL(cuMemAlloc(&cudaMemory, size));
+ }
+
+ {
+ CUDA_MEMCPY2D copyParam;
+ memset(&copyParam, 0, sizeof(copyParam));
+ copyParam.dstMemoryType = CU_MEMORYTYPE_ARRAY;
+ copyParam.dstArray = cudaArray;
+ copyParam.srcMemoryType = CU_MEMORYTYPE_HOST;
+ copyParam.srcHost = texData.dataBuffer[0].getBuffer();
+ copyParam.srcPitch = width * elementSize;
+ copyParam.WidthInBytes = copyParam.srcPitch;
+ copyParam.Height = height;
+ SLANG_CUDA_RETURN_ON_FAIL(cuMemcpy2D(&copyParam));
+ }
+
+ // set texture parameters
+
+ CUtexObject cudaTexObj;
+ {
+ CUDA_RESOURCE_DESC resDesc;
+ memset(&resDesc, 0, sizeof(CUDA_RESOURCE_DESC));
+ resDesc.resType = CU_RESOURCE_TYPE_ARRAY;
+ resDesc.res.array.hArray = cudaArray;
+
+ CUDA_TEXTURE_DESC texDesc;
+ memset(&texDesc, 0, sizeof(CUDA_TEXTURE_DESC));
+ texDesc.addressMode[0] = CU_TR_ADDRESS_MODE_WRAP;
+ texDesc.addressMode[1] = CU_TR_ADDRESS_MODE_WRAP;
+ texDesc.addressMode[2] = CU_TR_ADDRESS_MODE_WRAP;
+ texDesc.filterMode = CU_TR_FILTER_MODE_LINEAR;
+ texDesc.flags = CU_TRSF_NORMALIZED_COORDINATES;
+
+ SLANG_CUDA_RETURN_ON_FAIL(cuTexObjectCreate(&cudaTexObj, &resDesc, &texDesc, nullptr));
+ }
+
+ value->m_target = new CUDATextureResource(cudaTexObj, cudaMemory, cudaArray);
break;
}
case SLANG_TEXTURE_1D:
@@ -483,6 +606,15 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp
*location.getUniform<void*>() = CUDAResource::getCUDAData(value);
break;
}
+ case SLANG_TEXTURE_1D:
+ case SLANG_TEXTURE_2D:
+ case SLANG_TEXTURE_3D:
+ case SLANG_TEXTURE_CUBE:
+ {
+ *location.getUniform<CUtexObject>() = CUDATextureResource::getCUDATexObject(value);
+ break;
+ }
+
}
break;
}