From 1f401d04e32c6feaeb35243ea5bfc2b14520344b Mon Sep 17 00:00:00 2001 From: jsmall-nvidia Date: Thu, 20 Feb 2020 18:24:00 -0500 Subject: WIP on RWTexture types on CUDA/CPU (#1234) * CUDA support for array of resources. * * Add support for Texture2DArray on CPU * Expand texture-simple.slang to test Texture2DArray * Reorganise CUDAComputeUtil to split out createTextureResource. * Add TextureCubeArray support for CPU/CUDA targets. * Pulled out CUDAResource Renamed derived classes to reflect that change. * Creation of SurfObject type. * Functions to return read/write access for simplifying future additions. * WIP for RWTexture access on CPU/CUDA. * CUsurfObject cannot have mips. * Ability to set number of mips on test data. Preliminary support for CUsurfObj and RWTexture1D on CUDA. CUDA docs improvements. * Fix typo. --- prelude/slang-cpp-types.h | 15 +++++++++++++++ prelude/slang-cuda-prelude.h | 10 ++++++++++ 2 files changed, 25 insertions(+) (limited to 'prelude') diff --git a/prelude/slang-cpp-types.h b/prelude/slang-cpp-types.h index f62333f68..563b4b6e9 100644 --- a/prelude/slang-cpp-types.h +++ b/prelude/slang-cpp-types.h @@ -343,6 +343,21 @@ struct TextureCubeArray ITextureCubeArray* texture; }; +/* !!!!!!!!!!!!!!!!!!!!!!!!!!! RWTexture !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! */ + +struct IRWTexture1D +{ + virtual void Load(int32_t loc, void* out) = 0; +}; + +template +struct RWTexture1D +{ + T Load(int32_t loc) const { T out; texture->Load(loc, &out); return out; } + + IRWTexture1D* texture; +}; + /* Varying input for Compute */ /* Used when running a single thread */ diff --git a/prelude/slang-cuda-prelude.h b/prelude/slang-cuda-prelude.h index b81acba1e..1938e3dc1 100644 --- a/prelude/slang-cuda-prelude.h +++ b/prelude/slang-cuda-prelude.h @@ -38,6 +38,16 @@ // Here we don't have the index zeroing behavior, as such bounds checks are generally not on GPU targets either. #ifndef SLANG_CUDA_FIXED_ARRAY_BOUND_CHECK # define SLANG_CUDA_FIXED_ARRAY_BOUND_CHECK(index, count) SLANG_PRELUDE_ASSERT(index < count); +#endif + + // This macro handles how out-of-range surface coordinates are handled; + // I can equal + // cudaBoundaryModeClamp, in which case out-of-range coordinates are clamped to the valid range + // cudaBoundaryModeZero, in which case out-of-range reads return zero and out-of-range writes are ignored + // cudaBoundaryModeTrap, in which case out-of-range accesses cause the kernel execution to fail. + +#ifndef SLANG_CUDA_BOUNDARY_MODE +# define SLANG_CUDA_BOUNDARY_MODE cudaBoundaryModeZero #endif template -- cgit v1.2.3