From 8ee39e08c48a315163fe1850dbb12ca292020d4d Mon Sep 17 00:00:00 2001 From: jsmall-nvidia Date: Tue, 18 Feb 2020 14:14:16 -0500 Subject: First pass Texture Array support on CUDA/CPU (#1225) * Add cubemap support. * Add CUDA fence instrinsics. * Added Gather for CUDA. * Use the CUDA driver API as much as possible. * * Support 1D texture on CPU * WIP on 1D texture on CUDA * Added simplified texture test * Fix test. * Improve texture-simple tests. * * Add CPU support for 3d textures * Add support for mip maps to CUDA * Disable warnings in nvrtc * Update CUDA docs * WIP on 3d texture support. * Add support for 3d textures for CPU and CUDA. * CPU and CUDA support for cube maps. * Add CPU support for Texture1DArray. * Support CUDA Layered/Array type in meta library. --- source/slang/core.meta.slang | 79 ++++++++++++++++++++++++++++++--------- source/slang/core.meta.slang.h | 81 +++++++++++++++++++++++++++++++--------- source/slang/slang-emit-cuda.cpp | 44 ++++------------------ 3 files changed, 133 insertions(+), 71 deletions(-) (limited to 'source') diff --git a/source/slang/core.meta.slang b/source/slang/core.meta.slang index 450cc4512..ec1a3ed0b 100644 --- a/source/slang/core.meta.slang +++ b/source/slang/core.meta.slang @@ -894,22 +894,47 @@ for (int tt = 0; tt < kBaseTextureTypeCount; ++tt) sb << "__target_intrinsic(glsl, \"$ctexture($p, $2)$z\")\n"; - if( baseShape != TextureFlavor::Shape::ShapeCube ) + // CUDA { - sb << "__target_intrinsic(cuda, \"tex" << kBaseTextureTypes[tt].coordCount << "D<$T0>($0"; - for (int i = 0; i < kBaseTextureTypes[tt].coordCount; ++i) + const int coordCount = kBaseTextureTypes[tt].coordCount; + const int vecCount = coordCount + int(isArray); + + if( baseShape != TextureFlavor::Shape::ShapeCube ) { - sb << ", ($2)"; - if (kBaseTextureTypes[tt].coordCount > 1) + sb << "__target_intrinsic(cuda, \"tex" << coordCount << "D"; + if (isArray) { - sb << '.' << char(i + 'x'); + sb << "Layered"; } + sb << "<$T0>($0"; + for (int i = 0; i < coordCount; ++i) + { + sb << ", ($2)"; + if (vecCount > 1) + { + sb << '.' << char(i + 'x'); + } + } + if (isArray) + { + sb << ", int(($2)." << char(coordCount + 'x') << ")"; + } + sb << ")\")\n"; + } + else + { + sb << "__target_intrinsic(cuda, \"texCubemap"; + if (isArray) + { + sb << "Layered"; + } + sb << "<$T0>($0, ($2).x, ($2).y, ($2).z"; + if (isArray) + { + sb << ", int(($2).w)"; + } + sb << ")\")\n"; } - sb << ")\")\n"; - } - else - { - sb << "__target_intrinsic(cuda, \"texCubemap<$T0>($0, ($2).x, ($2).y, ($2).z)\")\n"; } sb << "T Sample(SamplerState s, "; @@ -939,7 +964,6 @@ for (int tt = 0; tt < kBaseTextureTypeCount; ++tt) } sb << "float clamp, out uint status);\n"; - // `SampleBias()` sb << "__target_intrinsic(glsl, \"$ctexture($p, $2, $3)$z\")\n"; sb << "T SampleBias(SamplerState s, "; @@ -1054,24 +1078,45 @@ for (int tt = 0; tt < kBaseTextureTypeCount; ++tt) sb << "__target_intrinsic(glsl, \"$ctextureLod($p, $2, $3)$z\")\n"; // CUDA - if (!isArray) { + const int coordCount = kBaseTextureTypes[tt].coordCount; + const int vecCount = coordCount + int(isArray); + if( baseShape != TextureFlavor::Shape::ShapeCube ) { - sb << "__target_intrinsic(cuda, \"tex" << kBaseTextureTypes[tt].coordCount << "DLod<$T0>($0"; - for (int i = 0; i < kBaseTextureTypes[tt].coordCount; ++i) + sb << "__target_intrinsic(cuda, \"tex" << coordCount << "D"; + if (isArray) + { + sb << "Layered"; + } + sb << "Lod<$T0>($0"; + for (int i = 0; i < coordCount; ++i) { sb << ", ($2)"; - if (kBaseTextureTypes[tt].coordCount > 1) + if (vecCount > 1) { sb << '.' << char(i + 'x'); } } + if (isArray) + { + sb << ", int(($2)." << char(coordCount + 'x') << ")"; + } sb << ", $3)\")\n"; } else { - sb << "__target_intrinsic(cuda, \"texCubemap<$T0>($0, ($2).x, ($2).y, ($2).z)\")\n"; + sb << "__target_intrinsic(cuda, \"texCubemap"; + if (isArray) + { + sb << "Layered"; + } + sb << "Lod<$T0>($0, ($2).x, ($2).y, ($2).z"; + if (isArray) + { + sb << ", int(($2).w)"; + } + sb << ", $3)\")\n"; } } diff --git a/source/slang/core.meta.slang.h b/source/slang/core.meta.slang.h index cca8f2e51..a8ad43965 100644 --- a/source/slang/core.meta.slang.h +++ b/source/slang/core.meta.slang.h @@ -915,22 +915,47 @@ for (int tt = 0; tt < kBaseTextureTypeCount; ++tt) sb << "__target_intrinsic(glsl, \"$ctexture($p, $2)$z\")\n"; - if( baseShape != TextureFlavor::Shape::ShapeCube ) + // CUDA { - sb << "__target_intrinsic(cuda, \"tex" << kBaseTextureTypes[tt].coordCount << "D<$T0>($0"; - for (int i = 0; i < kBaseTextureTypes[tt].coordCount; ++i) + const int coordCount = kBaseTextureTypes[tt].coordCount; + const int vecCount = coordCount + int(isArray); + + if( baseShape != TextureFlavor::Shape::ShapeCube ) { - sb << ", ($2)"; - if (kBaseTextureTypes[tt].coordCount > 1) + sb << "__target_intrinsic(cuda, \"tex" << coordCount << "D"; + if (isArray) { - sb << '.' << char(i + 'x'); + sb << "Layered"; } + sb << "<$T0>($0"; + for (int i = 0; i < coordCount; ++i) + { + sb << ", ($2)"; + if (vecCount > 1) + { + sb << '.' << char(i + 'x'); + } + } + if (isArray) + { + sb << ", int(($2)." << char(coordCount + 'x') << ")"; + } + sb << ")\")\n"; + } + else + { + sb << "__target_intrinsic(cuda, \"texCubemap"; + if (isArray) + { + sb << "Layered"; + } + sb << "<$T0>($0, ($2).x, ($2).y, ($2).z"; + if (isArray) + { + sb << ", int(($2).w)"; + } + sb << ")\")\n"; } - sb << ")\")\n"; - } - else - { - sb << "__target_intrinsic(cuda, \"texCubemap<$T0>($0, ($2).x, ($2).y, ($2).z)\")\n"; } sb << "T Sample(SamplerState s, "; @@ -960,7 +985,6 @@ for (int tt = 0; tt < kBaseTextureTypeCount; ++tt) } sb << "float clamp, out uint status);\n"; - // `SampleBias()` sb << "__target_intrinsic(glsl, \"$ctexture($p, $2, $3)$z\")\n"; sb << "T SampleBias(SamplerState s, "; @@ -1075,24 +1099,45 @@ for (int tt = 0; tt < kBaseTextureTypeCount; ++tt) sb << "__target_intrinsic(glsl, \"$ctextureLod($p, $2, $3)$z\")\n"; // CUDA - if (!isArray) { + const int coordCount = kBaseTextureTypes[tt].coordCount; + const int vecCount = coordCount + int(isArray); + if( baseShape != TextureFlavor::Shape::ShapeCube ) { - sb << "__target_intrinsic(cuda, \"tex" << kBaseTextureTypes[tt].coordCount << "DLod<$T0>($0"; - for (int i = 0; i < kBaseTextureTypes[tt].coordCount; ++i) + sb << "__target_intrinsic(cuda, \"tex" << coordCount << "D"; + if (isArray) + { + sb << "Layered"; + } + sb << "Lod<$T0>($0"; + for (int i = 0; i < coordCount; ++i) { sb << ", ($2)"; - if (kBaseTextureTypes[tt].coordCount > 1) + if (vecCount > 1) { sb << '.' << char(i + 'x'); } } + if (isArray) + { + sb << ", int(($2)." << char(coordCount + 'x') << ")"; + } sb << ", $3)\")\n"; } else { - sb << "__target_intrinsic(cuda, \"texCubemap<$T0>($0, ($2).x, ($2).y, ($2).z)\")\n"; + sb << "__target_intrinsic(cuda, \"texCubemap"; + if (isArray) + { + sb << "Layered"; + } + sb << "Lod<$T0>($0, ($2).x, ($2).y, ($2).z"; + if (isArray) + { + sb << ", int(($2).w)"; + } + sb << ", $3)\")\n"; } } @@ -1314,7 +1359,7 @@ for (auto op : binaryOps) sb << "__intrinsic_op(" << int(op.opCode) << ") matrix<" << resultType << ",N,M> operator" << op.opName << "(" << leftQual << "matrix<" << leftType << ",N,M> left, " << rightType << " right);\n"; } } -SLANG_RAW("#line 1296 \"core.meta.slang\"") +SLANG_RAW("#line 1341 \"core.meta.slang\"") SLANG_RAW("\n") SLANG_RAW("\n") SLANG_RAW("// Specialized function\n") diff --git a/source/slang/slang-emit-cuda.cpp b/source/slang/slang-emit-cuda.cpp index 0bbaafa5b..262a67784 100644 --- a/source/slang/slang-emit-cuda.cpp +++ b/source/slang/slang-emit-cuda.cpp @@ -81,57 +81,29 @@ static bool _isSingleNameBasicType(IROp op) SlangResult CUDASourceEmitter::_calcCUDATextureTypeName(IRTextureTypeBase* texType, StringBuilder& outName) { - // texture texRef; - // Not clear how to do this yet - if (texType->isMultisample() || texType->isArray()) + if (texType->isMultisample()) { return SLANG_FAIL; } - outName << "CUtexObject"; - -#if 0 - outName << "texture<"; - outName << _getTypeName(texType->getElementType()); - outName << ", "; - - switch (texType->GetBaseShape()) - { - case TextureFlavor::Shape::Shape1D: outName << "cudaTextureType1D"; break; - case TextureFlavor::Shape::Shape2D: outName << "cudaTextureType2D"; break; - case TextureFlavor::Shape::Shape3D: outName << "cudaTextureType3D"; break; - case TextureFlavor::Shape::ShapeCube: outName << "cudaTextureTypeCubemap"; break; - case TextureFlavor::Shape::ShapeBuffer: outName << "Buffer"; break; - default: - SLANG_DIAGNOSE_UNEXPECTED(getSink(), SourceLoc(), "unhandled resource shape"); - return SLANG_FAIL; - } - - outName << ", "; - switch (texType->getAccess()) { case SLANG_RESOURCE_ACCESS_READ: { - // Other value is cudaReadModeNormalizedFloat - - outName << "cudaReadModeElementType"; - break; + outName << "CUtexObject"; + return SLANG_OK; } - default: + case SLANG_RESOURCE_ACCESS_READ_WRITE: { - SLANG_DIAGNOSE_UNEXPECTED(getSink(), SourceLoc(), "unhandled resource access mode"); - return SLANG_FAIL; + outName << "CUsurfObject"; + return SLANG_OK; } + default: break; } - - outName << ">"; -#endif - return SLANG_OK; + return SLANG_FAIL; } - SlangResult CUDASourceEmitter::calcScalarFuncName(HLSLIntrinsic::Op op, IRBasicType* type, StringBuilder& outBuilder) { typedef HLSLIntrinsic::Op Op; -- cgit v1.2.3