From 1f401d04e32c6feaeb35243ea5bfc2b14520344b Mon Sep 17 00:00:00 2001 From: jsmall-nvidia Date: Thu, 20 Feb 2020 18:24:00 -0500 Subject: WIP on RWTexture types on CUDA/CPU (#1234) * CUDA support for array of resources. * * Add support for Texture2DArray on CPU * Expand texture-simple.slang to test Texture2DArray * Reorganise CUDAComputeUtil to split out createTextureResource. * Add TextureCubeArray support for CPU/CUDA targets. * Pulled out CUDAResource Renamed derived classes to reflect that change. * Creation of SurfObject type. * Functions to return read/write access for simplifying future additions. * WIP for RWTexture access on CPU/CUDA. * CUsurfObject cannot have mips. * Ability to set number of mips on test data. Preliminary support for CUsurfObj and RWTexture1D on CUDA. CUDA docs improvements. * Fix typo. --- source/slang/core.meta.slang | 65 ++++++++++++++++++++++++++++++++++++++++ source/slang/core.meta.slang.h | 67 +++++++++++++++++++++++++++++++++++++++++- source/slang/hlsl.meta.slang | 2 +- source/slang/hlsl.meta.slang.h | 2 +- 4 files changed, 133 insertions(+), 3 deletions(-) (limited to 'source') diff --git a/source/slang/core.meta.slang b/source/slang/core.meta.slang index ec1a3ed0b..722629034 100644 --- a/source/slang/core.meta.slang +++ b/source/slang/core.meta.slang @@ -777,6 +777,67 @@ for (int tt = 0; tt < kBaseTextureTypeCount; ++tt) sb << ")$z\")\n"; } + + // CUDA + if (isMultisample) + { + } + else + { + if (access == SLANG_RESOURCE_ACCESS_READ_WRITE) + { + const int coordCount = kBaseTextureTypes[tt].coordCount; + const int vecCount = coordCount + int(isArray); + + if( baseShape != TextureFlavor::Shape::ShapeCube ) + { + sb << "__target_intrinsic(cuda, \"surf" << coordCount << "D"; + if (isArray) + { + sb << "Layered"; + } + sb << "read"; + sb << "<$T0>($0"; + for (int i = 0; i < coordCount; ++i) + { + sb << ", ($1)"; + if (vecCount > 1) + { + sb << '.' << char(i + 'x'); + } + } + if (isArray) + { + sb << ", int(($1)." << char(coordCount + 'x') << ")"; + } + sb << ", SLANG_CUDA_BOUNDARY_MODE)\")\n"; + } + else + { + sb << "__target_intrinsic(cuda, \"surfCubemap"; + if (isArray) + { + sb << "Layered"; + } + sb << "read"; + sb << "<$T0>($0, ($1).x, ($1).y, ($1).z"; + if (isArray) + { + sb << ", int(($1).w)"; + } + sb << ", SLANG_CUDA_BOUNDARY_MODE)\")\n"; + } + } + else if (access == SLANG_RESOURCE_ACCESS_READ) + { + // We can allow this on Texture1D + if( baseShape == TextureFlavor::Shape::Shape1D && isArray == false) + { + sb << "__target_intrinsic(cuda, \"tex1Dfetch<$T0>($0, ($1).x)\")\n"; + } + } + } + sb << "T Load("; sb << "int" << loadCoordCount << " location"; if(isMultisample) @@ -785,6 +846,7 @@ for (int tt = 0; tt < kBaseTextureTypeCount; ++tt) } sb << ");\n"; + // GLSL if (isMultisample) { sb << "__glsl_extension(GL_EXT_samplerless_texture_functions)"; @@ -804,6 +866,9 @@ for (int tt = 0; tt < kBaseTextureTypeCount; ++tt) } sb << ", $2)$z\")\n"; } + + + sb << "T Load("; sb << "int" << loadCoordCount << " location"; if(isMultisample) diff --git a/source/slang/core.meta.slang.h b/source/slang/core.meta.slang.h index a8ad43965..ba960b1d1 100644 --- a/source/slang/core.meta.slang.h +++ b/source/slang/core.meta.slang.h @@ -798,6 +798,67 @@ for (int tt = 0; tt < kBaseTextureTypeCount; ++tt) sb << ")$z\")\n"; } + + // CUDA + if (isMultisample) + { + } + else + { + if (access == SLANG_RESOURCE_ACCESS_READ_WRITE) + { + const int coordCount = kBaseTextureTypes[tt].coordCount; + const int vecCount = coordCount + int(isArray); + + if( baseShape != TextureFlavor::Shape::ShapeCube ) + { + sb << "__target_intrinsic(cuda, \"surf" << coordCount << "D"; + if (isArray) + { + sb << "Layered"; + } + sb << "read"; + sb << "<$T0>($0"; + for (int i = 0; i < coordCount; ++i) + { + sb << ", ($1)"; + if (vecCount > 1) + { + sb << '.' << char(i + 'x'); + } + } + if (isArray) + { + sb << ", int(($1)." << char(coordCount + 'x') << ")"; + } + sb << ", SLANG_CUDA_BOUNDARY_MODE)\")\n"; + } + else + { + sb << "__target_intrinsic(cuda, \"surfCubemap"; + if (isArray) + { + sb << "Layered"; + } + sb << "read"; + sb << "<$T0>($0, ($1).x, ($1).y, ($1).z"; + if (isArray) + { + sb << ", int(($1).w)"; + } + sb << ", SLANG_CUDA_BOUNDARY_MODE)\")\n"; + } + } + else if (access == SLANG_RESOURCE_ACCESS_READ) + { + // We can allow this on Texture1D + if( baseShape == TextureFlavor::Shape::Shape1D && isArray == false) + { + sb << "__target_intrinsic(cuda, \"tex1Dfetch<$T0>($0, ($1).x)\")\n"; + } + } + } + sb << "T Load("; sb << "int" << loadCoordCount << " location"; if(isMultisample) @@ -806,6 +867,7 @@ for (int tt = 0; tt < kBaseTextureTypeCount; ++tt) } sb << ");\n"; + // GLSL if (isMultisample) { sb << "__glsl_extension(GL_EXT_samplerless_texture_functions)"; @@ -825,6 +887,9 @@ for (int tt = 0; tt < kBaseTextureTypeCount; ++tt) } sb << ", $2)$z\")\n"; } + + + sb << "T Load("; sb << "int" << loadCoordCount << " location"; if(isMultisample) @@ -1359,7 +1424,7 @@ for (auto op : binaryOps) sb << "__intrinsic_op(" << int(op.opCode) << ") matrix<" << resultType << ",N,M> operator" << op.opName << "(" << leftQual << "matrix<" << leftType << ",N,M> left, " << rightType << " right);\n"; } } -SLANG_RAW("#line 1341 \"core.meta.slang\"") +SLANG_RAW("#line 1406 \"core.meta.slang\"") SLANG_RAW("\n") SLANG_RAW("\n") SLANG_RAW("// Specialized function\n") diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang index 988c6f69c..c3339cbb5 100644 --- a/source/slang/hlsl.meta.slang +++ b/source/slang/hlsl.meta.slang @@ -1433,7 +1433,7 @@ __generic uint4 WaveMatch(matrix uint4 WaveMatc SLANG_RAW("\n") SLANG_RAW("// TODO(JS): For CUDA the article claims mask has to be used carefully\n") SLANG_RAW("// https://devblogs.nvidia.com/using-cuda-warp-level-primitives/\n") -SLANG_RAW("// With the Warp intrinsics there is though mask, and it's just the 'active lanes'. So __activemask()\n") +SLANG_RAW("// With the Warp intrinsics there is no mask, and it's just the 'active lanes'. So __activemask()\n") SLANG_RAW("// seems to be appropriate.\n") SLANG_RAW("\n") SLANG_RAW("__target_intrinsic(cuda, \"(__all_sync(__activemask(), $0) != 0)\") \n") -- cgit v1.2.3