diff options
| author | jsmall-nvidia <jsmall@nvidia.com> | 2020-01-24 15:06:08 -0500 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2020-01-24 15:06:08 -0500 |
| commit | b8f294445b998eadb9b09e2b91eb462b881eaf2e (patch) | |
| tree | 8607e5d2f6c2c2b4b7545a721d6d58e6e557e5c0 | |
| parent | 394983d61efa2bf99ba96aa68a47df8927a8a634 (diff) | |
Texture Sample available in CUDA (#1176)
* WIP: Trying to figure out how texturing will work with CUDA.
* WIP: Fixes for CUDA layout. Initial CUDA texture test.
* WIP: Outputs something compilable by CUDA for TextureND.Sample
* 2d texture working with CUDA.
* Fix how binding for SamplerState occurs in CUDA.
* Small tidy up of comments.
| -rw-r--r-- | docs/cuda-target.md | 4 | ||||
| -rw-r--r-- | prelude/slang-cuda-prelude.h | 11 | ||||
| -rw-r--r-- | source/core/slang-platform.cpp | 15 | ||||
| -rw-r--r-- | source/core/slang-platform.h | 3 | ||||
| -rw-r--r-- | source/slang/core.meta.slang | 19 | ||||
| -rw-r--r-- | source/slang/core.meta.slang.h | 21 | ||||
| -rw-r--r-- | source/slang/slang-emit-c-like.cpp | 4 | ||||
| -rw-r--r-- | source/slang/slang-emit-cpp.cpp | 35 | ||||
| -rw-r--r-- | source/slang/slang-emit-cuda.cpp | 11 | ||||
| -rw-r--r-- | source/slang/slang-type-layout.cpp | 22 | ||||
| -rw-r--r-- | tests/cuda/cuda-texture.slang | 22 | ||||
| -rw-r--r-- | tests/cuda/cuda-texture.slang.expected.txt | 4 | ||||
| -rw-r--r-- | tools/render-test/cuda/cuda-compute-util.cpp | 146 |
13 files changed, 271 insertions, 46 deletions
diff --git a/docs/cuda-target.md b/docs/cuda-target.md index db8c98f14..41fc98790 100644 --- a/docs/cuda-target.md +++ b/docs/cuda-target.md @@ -14,7 +14,7 @@ Slang has preliminary support for producing CUDA source, and PTX binaries using These limitations apply to Slang transpiling to CUDA. -* Only supports the 'texture object' style binding +* Only supports the 'texture object' style binding (The texture object API is only supported on devices of compute capability 3.0 or higher. ) * Samplers are not separate objects in CUDA - they are combined into a single 'TextureObject'. So samplers are effectively ignored on CUDA targets. * Whilst there is tex1Dfetch there are no equivalents for higher dimensions - so such accesses are not currently supported @@ -68,7 +68,7 @@ struct UniformEntryPointParams struct UniformState { CUtexObject tex; // This is the combination of a texture and a sampler(!) - //SamplerState sampler; // CUDA doesn't have separate sampler objects - so this is just ignored. + SamplerState sampler; // This variable exists within the layout, but it's value is not used. int32_t* outputBuffer; // Currently Structured buffers are converted to pointers - this will likely change in the future (for bounds checking and other reasons) Thing* thing3; // Constant buffers map to pointers }; diff --git a/prelude/slang-cuda-prelude.h b/prelude/slang-cuda-prelude.h index f78814486..28e423b31 100644 --- a/prelude/slang-cuda-prelude.h +++ b/prelude/slang-cuda-prelude.h @@ -19,6 +19,17 @@ struct FixedArray T m_data[SIZE]; }; +// Typically defined in cuda.h, but we can't ship/rely on that, so just define here +typedef unsigned long long CUtexObject; +typedef unsigned long long CUsurfObject; + +// On CUDA sampler state is actually bound up with the texture object. We have a SamplerState type, +// backed as a pointer, to simplify code generation, with the downside that such a binding will take up +// uniform space, even though it will have no effect. +// TODO(JS): Consider ways to strip use of variables of this type so have no binding, +struct SamplerStateUnused; +typedef SamplerStateUnused* SamplerState; + // Code generator will generate the specific type template <typename T, int ROWS, int COLS> struct Matrix; diff --git a/source/core/slang-platform.cpp b/source/core/slang-platform.cpp index e735216f4..d02951e0b 100644 --- a/source/core/slang-platform.cpp +++ b/source/core/slang-platform.cpp @@ -1,4 +1,7 @@ // slang-platform.cpp + +#define _CRT_SECURE_NO_WARNINGS + #include "slang-platform.h" #include "slang-common.h" @@ -197,6 +200,18 @@ SLANG_COMPILE_TIME_ASSERT(E_OUTOFMEMORY == SLANG_E_OUT_OF_MEMORY); #endif // _WIN32 + +/* static */SlangResult PlatformUtil::getEnvironmentVariable(const UnownedStringSlice& name, StringBuilder& out) +{ + const char* value = getenv(String(name).getBuffer()); + if (value) + { + out.append(value); + return SLANG_OK; + } + return SLANG_E_NOT_FOUND; +} + /* static */PlatformKind PlatformUtil::getPlatformKind() { #if SLANG_WINRT diff --git a/source/core/slang-platform.h b/source/core/slang-platform.h index c3ad1c486..767e83c1d 100644 --- a/source/core/slang-platform.h +++ b/source/core/slang-platform.h @@ -128,6 +128,9 @@ namespace Slang /// True if the kind is part of the family static bool isFamily(PlatformFamily family, PlatformKind kind) { return (getPlatformFlags(family) & (PlatformFlags(1) << int(kind))) != 0; } + /// Given an environment name returns the set system variable. + /// Will return SLANG_E_NOT_FOUND if the variable is not set + static SlangResult getEnvironmentVariable(const UnownedStringSlice& name, StringBuilder& out); }; #ifndef _MSC_VER diff --git a/source/slang/core.meta.slang b/source/slang/core.meta.slang index 80ff09ea8..58e6e287c 100644 --- a/source/slang/core.meta.slang +++ b/source/slang/core.meta.slang @@ -770,6 +770,7 @@ for (int tt = 0; tt < kBaseTextureTypeCount; ++tt) sb << "$1"; } sb << ")$z\")\n"; + } sb << "T Load("; sb << "int" << loadCoordCount << " location"; @@ -887,6 +888,24 @@ for (int tt = 0; tt < kBaseTextureTypeCount; ++tt) // `Sample()` sb << "__target_intrinsic(glsl, \"$ctexture($p, $2)$z\")\n"; + + if( baseShape != TextureFlavor::Shape::ShapeCube ) + { + sb << "__target_intrinsic(cuda, \"tex" << kBaseTextureTypes[tt].coordCount << "D<$S0>($0"; + if (kBaseTextureTypes[tt].coordCount == 1) + { + sb << ", $2"; + } + else + { + for (int i = 0; i < kBaseTextureTypes[tt].coordCount; ++i) + { + sb << ", ($2)." << char(i + 'x'); + } + } + sb << ")\")\n"; + } + sb << "T Sample(SamplerState s, "; sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location);\n"; diff --git a/source/slang/core.meta.slang.h b/source/slang/core.meta.slang.h index 27811b588..201429700 100644 --- a/source/slang/core.meta.slang.h +++ b/source/slang/core.meta.slang.h @@ -791,6 +791,7 @@ for (int tt = 0; tt < kBaseTextureTypeCount; ++tt) sb << "$1"; } sb << ")$z\")\n"; + } sb << "T Load("; sb << "int" << loadCoordCount << " location"; @@ -908,6 +909,24 @@ for (int tt = 0; tt < kBaseTextureTypeCount; ++tt) // `Sample()` sb << "__target_intrinsic(glsl, \"$ctexture($p, $2)$z\")\n"; + + if( baseShape != TextureFlavor::Shape::ShapeCube ) + { + sb << "__target_intrinsic(cuda, \"tex" << kBaseTextureTypes[tt].coordCount << "D<$S0>($0"; + if (kBaseTextureTypes[tt].coordCount == 1) + { + sb << ", $2"; + } + else + { + for (int i = 0; i < kBaseTextureTypes[tt].coordCount; ++i) + { + sb << ", ($2)." << char(i + 'x'); + } + } + sb << ")\")\n"; + } + sb << "T Sample(SamplerState s, "; sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location);\n"; @@ -1258,7 +1277,7 @@ for (auto op : binaryOps) sb << "__intrinsic_op(" << int(op.opCode) << ") matrix<" << resultType << ",N,M> operator" << op.opName << "(" << leftQual << "matrix<" << leftType << ",N,M> left, " << rightType << " right);\n"; } } -SLANG_RAW("#line 1240 \"core.meta.slang\"") +SLANG_RAW("#line 1259 \"core.meta.slang\"") SLANG_RAW("\n") SLANG_RAW("\n") SLANG_RAW("// Specialized function\n") diff --git a/source/slang/slang-emit-c-like.cpp b/source/slang/slang-emit-c-like.cpp index 10790567e..8e0c64d1a 100644 --- a/source/slang/slang-emit-c-like.cpp +++ b/source/slang/slang-emit-c-like.cpp @@ -1250,6 +1250,10 @@ void CLikeSourceEmitter::emitIntrinsicCallExprImpl( SLANG_RELEASE_ASSERT(argCount > argIndex); IRType* type = args[argIndex].get()->getDataType(); + if (auto baseTextureType = as<IRTextureType>(type)) + { + type = baseTextureType->getElementType(); + } IRBasicType* underlyingType = nullptr; if (auto basicType = as<IRBasicType>(type)) diff --git a/source/slang/slang-emit-cpp.cpp b/source/slang/slang-emit-cpp.cpp index 5ea3c72a2..759f980d3 100644 --- a/source/slang/slang-emit-cpp.cpp +++ b/source/slang/slang-emit-cpp.cpp @@ -1912,48 +1912,23 @@ void CPPSourceEmitter::emitIntrinsicCallExprImpl( return; } - auto prec = getInfo(EmitOp::Postfix); - needClose = maybeEmitParens(outerPrec, prec); - - if (name[0] == '.') - { - // Looks like a member function call - emitOperand(args[0].get(), leftSide(outerPrec, prec)); - m_writer->emit("."); - - name = UnownedStringSlice(name.begin() + 1, name.end()); - - args++; - argCount--; - } - else { Op op = m_opLookup->getOpByName(name); if (op != Op::Invalid) { - + // Work out the intrinsic used HLSLIntrinsic intrinsic; m_intrinsicSet.calcIntrinsic(op, inst->getDataType(), args, argCount, intrinsic); HLSLIntrinsic* specOp = m_intrinsicSet.add(intrinsic); - + emitCall(specOp, inst, args, int(argCount), inOuterPrec); return; } } - - m_writer->emit(name); - m_writer->emit("("); - for (Index i = 0; i < argCount; ++i) - { - if (i != 0) - { - m_writer->emit(", "); - } - emitOperand(args[i].get(), getInfo(EmitOp::General)); - } - m_writer->emit(")"); - maybeCloseParens(needClose); + + // Use default impl (which will do intrinsic special macro expansion as necessary) + return Super::emitIntrinsicCallExprImpl(inst, targetIntrinsic, inOuterPrec); } bool CPPSourceEmitter::_tryEmitInstExprAsIntrinsic(IRInst* inst, const EmitOpInfo& inOuterPrec) diff --git a/source/slang/slang-emit-cuda.cpp b/source/slang/slang-emit-cuda.cpp index 93508813b..26d6eada0 100644 --- a/source/slang/slang-emit-cuda.cpp +++ b/source/slang/slang-emit-cuda.cpp @@ -89,6 +89,9 @@ SlangResult CUDASourceEmitter::_calcCUDATextureTypeName(IRTextureTypeBase* texTy return SLANG_FAIL; } + outName << "CUtexObject"; + +#if 0 outName << "texture<"; outName << _getTypeName(texType->getElementType()); outName << ", "; @@ -124,6 +127,7 @@ SlangResult CUDASourceEmitter::_calcCUDATextureTypeName(IRTextureTypeBase* texTy } outName << ">"; +#endif return SLANG_OK; } @@ -312,6 +316,13 @@ SlangResult CUDASourceEmitter::calcTypeName(IRType* type, CodeGenTarget target, } } + switch (type->op) + { + case kIROp_SamplerStateType: out << "SamplerState"; return SLANG_OK; + case kIROp_SamplerComparisonStateType: out << "SamplerComparisonState"; return SLANG_OK; + default: break; + } + break; } } diff --git a/source/slang/slang-type-layout.cpp b/source/slang/slang-type-layout.cpp index 644f54a95..cf793b52d 100644 --- a/source/slang/slang-type-layout.cpp +++ b/source/slang/slang-type-layout.cpp @@ -735,25 +735,30 @@ struct CUDAObjectLayoutRulesImpl : CPUObjectLayoutRulesImpl { typedef CPUObjectLayoutRulesImpl Super; + // cuda.h defines a variety of handle types. We don't want to have to include cuda.h though - as it may not be available + // on a build target. So for we define this handle type, that matches cuda.h and is used for types that use this kind + // of opaque handle (as opposed to a pointer) such as CUsurfObject, CUtexObject + typedef unsigned long long ObjectHandle; + virtual SimpleLayoutInfo GetObjectLayout(ShaderParameterKind kind) override { switch (kind) { case ShaderParameterKind::ConstantBuffer: // It's a pointer to the actual uniform data - return SimpleLayoutInfo(LayoutResourceKind::Uniform, sizeof(void*), sizeof(void*)); + return SimpleLayoutInfo(LayoutResourceKind::Uniform, sizeof(void*), SLANG_ALIGN_OF(void*)); case ShaderParameterKind::MutableTexture: case ShaderParameterKind::TextureUniformBuffer: case ShaderParameterKind::Texture: // It's a pointer to a texture interface - return SimpleLayoutInfo(LayoutResourceKind::Uniform, sizeof(void*), sizeof(void*)); + return SimpleLayoutInfo(LayoutResourceKind::Uniform, sizeof(ObjectHandle), SLANG_ALIGN_OF(ObjectHandle)); case ShaderParameterKind::StructuredBuffer: case ShaderParameterKind::MutableStructuredBuffer: // TODO(JS): We are just storing as a pointer for now // It's a ptr and a size of the amount of elements - return SimpleLayoutInfo(LayoutResourceKind::Uniform, sizeof(void*), sizeof(void*)); + return SimpleLayoutInfo(LayoutResourceKind::Uniform, sizeof(void*), SLANG_ALIGN_OF(void*)); case ShaderParameterKind::RawBuffer: case ShaderParameterKind::Buffer: @@ -763,11 +768,16 @@ struct CUDAObjectLayoutRulesImpl : CPUObjectLayoutRulesImpl // TODO(JS): We are storing as a pointer for now // It's a pointer and a size in bytes - return SimpleLayoutInfo(LayoutResourceKind::Uniform, sizeof(void*), sizeof(void*)); + return SimpleLayoutInfo(LayoutResourceKind::Uniform, sizeof(void*), SLANG_ALIGN_OF(void*)); case ShaderParameterKind::SamplerState: - // It's a pointer - return SimpleLayoutInfo(LayoutResourceKind::Uniform, sizeof(void*), sizeof(void*)); + // In CUDA it seems that sampler states are combined into texture objects. + // So it's a binding issue to combine a sampler with a texture - and sampler are ignored + // For simplicity here though - we do create a variable and that variable takes up + // uniform binding space. + // TODO(JS): If we wanted to remove these variables we'd want to do it as a pass. The pass + // would presumably have to remove use of variables of this kind throughout IR. + return SimpleLayoutInfo(LayoutResourceKind::Uniform, sizeof(void*), SLANG_ALIGN_OF(void*)); case ShaderParameterKind::TextureSampler: case ShaderParameterKind::MutableTextureSampler: diff --git a/tests/cuda/cuda-texture.slang b/tests/cuda/cuda-texture.slang new file mode 100644 index 000000000..3b5bae8ee --- /dev/null +++ b/tests/cuda/cuda-texture.slang @@ -0,0 +1,22 @@ +//TEST(compute):COMPARE_COMPUTE:-cpu -compute +//TEST(compute):COMPARE_COMPUTE:-cuda -compute + +//TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):out,name=outputBuffer +RWStructuredBuffer<float> outputBuffer : register(u0); + +//TEST_INPUT: Texture2D(size=4, content=one):name texture +Texture2D<float> texture; + +//TEST_INPUT: Sampler:name sampler +SamplerState sampler; + +[numthreads(4, 1, 1)] +void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) +{ + int tid = int(dispatchThreadID.x); + float u = tid * (1.0f / 3.0f); + float v = 1.0f - u; + float2 uv = float2(u, v); + + outputBuffer[tid] = texture.Sample(sampler, uv); +} diff --git a/tests/cuda/cuda-texture.slang.expected.txt b/tests/cuda/cuda-texture.slang.expected.txt new file mode 100644 index 000000000..cc5e55ab6 --- /dev/null +++ b/tests/cuda/cuda-texture.slang.expected.txt @@ -0,0 +1,4 @@ +3F800000 +3F800000 +3F800000 +3F800000 diff --git a/tools/render-test/cuda/cuda-compute-util.cpp b/tools/render-test/cuda/cuda-compute-util.cpp index e42a0a53e..a21747554 100644 --- a/tools/render-test/cuda/cuda-compute-util.cpp +++ b/tools/render-test/cuda/cuda-compute-util.cpp @@ -58,6 +58,52 @@ public: void* m_cudaMemory; }; +class CUDATextureResource : public RefObject +{ +public: + typedef RefObject Super; + + CUDATextureResource(CUtexObject cudaTexObj, CUdeviceptr cudaMemory, CUarray cudaArray): + m_cudaTexObj(cudaTexObj), + m_cudaMemory(cudaMemory), + m_cudaArray(cudaArray) + { + } + ~CUDATextureResource() + { + if (m_cudaTexObj) + { + SLANG_CUDA_ASSERT_ON_FAIL(cuTexObjectDestroy(m_cudaTexObj)); + } + if (m_cudaMemory) + { + SLANG_CUDA_ASSERT_ON_FAIL(cuMemFree(m_cudaMemory)); + } + if (m_cudaArray) + { + SLANG_CUDA_ASSERT_ON_FAIL(cuArrayDestroy(m_cudaArray)); + } + } + + static CUtexObject getCUDATexObject(BindSet::Value* value) + { + if (value) + { + auto resource = dynamic_cast<CUDATextureResource*>(value->m_target.Ptr()); + // It's an assumption here that 0 is okay for null. Seems to work... + return resource ? resource->m_cudaTexObj : CUtexObject(0); + } + + return CUtexObject(0); + } + +protected: + // This is an opaque type, that's backed by a long long + CUtexObject m_cudaTexObj = CUtexObject(); + CUdeviceptr m_cudaMemory = CUdeviceptr(); + CUarray m_cudaArray = CUarray(); +}; + class ScopeCUDAModule { public: @@ -381,10 +427,6 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp SLANG_ASSERT(value->m_userIndex >= 0); auto& srcEntry = entries[value->m_userIndex]; - // TODO(JS): - // We should use the srcEntry to determine what data to store in the texture, - // it's dimensions etc. For now we just support it being 1. - slang::TypeReflection* typeReflection = typeLayout->getResourceResultType(); int count = 1; @@ -393,9 +435,90 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp count = int(typeReflection->getElementCount()); } - // TODO(JS): Should use the input setup to work how to create this texture - // Store the target specific value - //value->m_target = _newOneTexture2D(count); + const auto& textureDesc = srcEntry.textureDesc; + + int width = textureDesc.size; + int height = textureDesc.size; + + TextureData texData; + generateTextureData(texData, textureDesc); + + size_t elementSize = 0; + + CUarray cudaArray; + { + CUDA_ARRAY_DESCRIPTOR arrayDesc; + arrayDesc.Width = width; + arrayDesc.Height = height; + + switch (textureDesc.format) + { + case Format::R_Float32: + { + arrayDesc.Format = CU_AD_FORMAT_FLOAT; + arrayDesc.NumChannels = 1; + elementSize = sizeof(float); + break; + } + case Format::RGBA_Unorm_UInt8: + { + arrayDesc.Format = CU_AD_FORMAT_UNSIGNED_INT8; + arrayDesc.NumChannels = 4; + elementSize = sizeof(uint32_t); + break; + } + default: + { + SLANG_ASSERT(!"Only support R_Float32/RGBA_Unorm_UInt8 formats for now"); + return SLANG_FAIL; + } + } + + // Allocate the array + SLANG_CUDA_RETURN_ON_FAIL(cuArrayCreate(&cudaArray, &arrayDesc)); + } + + CUdeviceptr cudaMemory = (CUdeviceptr)nullptr; + { + const size_t size = width * height * elementSize; + // allocate device memory for result + SLANG_CUDA_RETURN_ON_FAIL(cuMemAlloc(&cudaMemory, size)); + } + + { + CUDA_MEMCPY2D copyParam; + memset(©Param, 0, sizeof(copyParam)); + copyParam.dstMemoryType = CU_MEMORYTYPE_ARRAY; + copyParam.dstArray = cudaArray; + copyParam.srcMemoryType = CU_MEMORYTYPE_HOST; + copyParam.srcHost = texData.dataBuffer[0].getBuffer(); + copyParam.srcPitch = width * elementSize; + copyParam.WidthInBytes = copyParam.srcPitch; + copyParam.Height = height; + SLANG_CUDA_RETURN_ON_FAIL(cuMemcpy2D(©Param)); + } + + // set texture parameters + + CUtexObject cudaTexObj; + { + CUDA_RESOURCE_DESC resDesc; + memset(&resDesc, 0, sizeof(CUDA_RESOURCE_DESC)); + resDesc.resType = CU_RESOURCE_TYPE_ARRAY; + resDesc.res.array.hArray = cudaArray; + + CUDA_TEXTURE_DESC texDesc; + memset(&texDesc, 0, sizeof(CUDA_TEXTURE_DESC)); + texDesc.addressMode[0] = CU_TR_ADDRESS_MODE_WRAP; + texDesc.addressMode[1] = CU_TR_ADDRESS_MODE_WRAP; + texDesc.addressMode[2] = CU_TR_ADDRESS_MODE_WRAP; + texDesc.filterMode = CU_TR_FILTER_MODE_LINEAR; + texDesc.flags = CU_TRSF_NORMALIZED_COORDINATES; + + SLANG_CUDA_RETURN_ON_FAIL(cuTexObjectCreate(&cudaTexObj, &resDesc, &texDesc, nullptr)); + } + + value->m_target = new CUDATextureResource(cudaTexObj, cudaMemory, cudaArray); break; } case SLANG_TEXTURE_1D: @@ -483,6 +606,15 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp *location.getUniform<void*>() = CUDAResource::getCUDAData(value); break; } + case SLANG_TEXTURE_1D: + case SLANG_TEXTURE_2D: + case SLANG_TEXTURE_3D: + case SLANG_TEXTURE_CUBE: + { + *location.getUniform<CUtexObject>() = CUDATextureResource::getCUDATexObject(value); + break; + } + } break; } |
