diff options
| author | ArielG-NV <159081215+ArielG-NV@users.noreply.github.com> | 2025-08-07 00:22:22 -0700 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-08-07 07:22:22 +0000 |
| commit | 063cbeaaea2fb00a10c6058ea4a9632092772ea5 (patch) | |
| tree | b4412347d6c264c3b1a84ec971921a5e2fe76134 /prelude | |
| parent | 9e2685853033f4286feaf22d04a755a7395d95ce (diff) | |
Initial copy elision pass (#8042)
Fixes #7574
Changes:
* Add an initial (fairly simple) optimization pass which is able to
eliminate redundant copies.
* Our current existing optimizer passes remove redundant load/store very
robustly, this pass will focus on other cases of copy elimination
* Primary approach is to make all functions which are `in T` and `T` is
trivial to copy into a `__constref T`. We then (depending on scenario)
manually insert a variable+load if a pass-by-reference is not possible;
otherwise we pass by `constref`.
* Added optimizations to eliminate redundant code which causes
`constref` to fail to compile
---------
Co-authored-by: Harsh Aggarwal <haaggarwal@nvidia.com>
Co-authored-by: Claude <noreply@anthropic.com>
Co-authored-by: slangbot <ellieh+slangbot@nvidia.com>
Co-authored-by: slangbot <186143334+slangbot@users.noreply.github.com>
Diffstat (limited to 'prelude')
| -rw-r--r-- | prelude/slang-cpp-types.h | 14 | ||||
| -rw-r--r-- | prelude/slang-cuda-prelude.h | 39 |
2 files changed, 29 insertions, 24 deletions
diff --git a/prelude/slang-cpp-types.h b/prelude/slang-cpp-types.h index 010ab8d6c..491438c80 100644 --- a/prelude/slang-cpp-types.h +++ b/prelude/slang-cpp-types.h @@ -440,7 +440,7 @@ struct Texture1D texture->Sample(samplerState, &loc, &out, sizeof(out)); return out; } - T SampleLevel(SamplerState samplerState, float loc, float level) + T SampleLevel(SamplerState samplerState, float loc, float level) const { T out; texture->SampleLevel(samplerState, &loc, level, &out, sizeof(out)); @@ -500,7 +500,7 @@ struct Texture2D texture->Sample(samplerState, &loc.x, &out, sizeof(out)); return out; } - T SampleLevel(SamplerState samplerState, const float2& loc, float level) + T SampleLevel(SamplerState samplerState, const float2& loc, float level) const { T out; texture->SampleLevel(samplerState, &loc.x, level, &out, sizeof(out)); @@ -566,7 +566,7 @@ struct Texture3D texture->Sample(samplerState, &loc.x, &out, sizeof(out)); return out; } - T SampleLevel(SamplerState samplerState, const float3& loc, float level) + T SampleLevel(SamplerState samplerState, const float3& loc, float level) const { T out; texture->SampleLevel(samplerState, &loc.x, level, &out, sizeof(out)); @@ -620,7 +620,7 @@ struct TextureCube texture->Sample(samplerState, &loc.x, &out, sizeof(out)); return out; } - T SampleLevel(SamplerState samplerState, const float3& loc, float level) + T SampleLevel(SamplerState samplerState, const float3& loc, float level) const { T out; texture->SampleLevel(samplerState, &loc.x, level, &out, sizeof(out)); @@ -680,7 +680,7 @@ struct Texture1DArray texture->Sample(samplerState, &loc.x, &out, sizeof(out)); return out; } - T SampleLevel(SamplerState samplerState, const float2& loc, float level) + T SampleLevel(SamplerState samplerState, const float2& loc, float level) const { T out; texture->SampleLevel(samplerState, &loc.x, level, &out, sizeof(out)); @@ -747,7 +747,7 @@ struct Texture2DArray texture->Sample(samplerState, &loc.x, &out, sizeof(out)); return out; } - T SampleLevel(SamplerState samplerState, const float3& loc, float level) + T SampleLevel(SamplerState samplerState, const float3& loc, float level) const { T out; texture->SampleLevel(samplerState, &loc.x, level, &out, sizeof(out)); @@ -808,7 +808,7 @@ struct TextureCubeArray texture->Sample(samplerState, &loc.x, &out, sizeof(out)); return out; } - T SampleLevel(SamplerState samplerState, const float4& loc, float level) + T SampleLevel(SamplerState samplerState, const float4& loc, float level) const { T out; texture->SampleLevel(samplerState, &loc.x, level, &out, sizeof(out)); diff --git a/prelude/slang-cuda-prelude.h b/prelude/slang-cuda-prelude.h index 178c12f5f..a66fa15cb 100644 --- a/prelude/slang-cuda-prelude.h +++ b/prelude/slang-cuda-prelude.h @@ -348,22 +348,22 @@ SLANG_VECTOR_GET_ELEMENT(ulonglong) SLANG_VECTOR_GET_ELEMENT(float) SLANG_VECTOR_GET_ELEMENT(double) -#define SLANG_VECTOR_GET_ELEMENT_PTR(T) \ - SLANG_FORCE_INLINE SLANG_CUDA_CALL T* _slang_vector_get_element_ptr(T##1 * x, int index) \ - { \ - return ((T*)(x)) + index; \ - } \ - SLANG_FORCE_INLINE SLANG_CUDA_CALL T* _slang_vector_get_element_ptr(T##2 * x, int index) \ - { \ - return ((T*)(x)) + index; \ - } \ - SLANG_FORCE_INLINE SLANG_CUDA_CALL T* _slang_vector_get_element_ptr(T##3 * x, int index) \ - { \ - return ((T*)(x)) + index; \ - } \ - SLANG_FORCE_INLINE SLANG_CUDA_CALL T* _slang_vector_get_element_ptr(T##4 * x, int index) \ - { \ - return ((T*)(x)) + index; \ +#define SLANG_VECTOR_GET_ELEMENT_PTR(T) \ + SLANG_FORCE_INLINE SLANG_CUDA_CALL T* _slang_vector_get_element_ptr(const T##1 * x, int index) \ + { \ + return ((T*)(x)) + index; \ + } \ + SLANG_FORCE_INLINE SLANG_CUDA_CALL T* _slang_vector_get_element_ptr(const T##2 * x, int index) \ + { \ + return ((T*)(x)) + index; \ + } \ + SLANG_FORCE_INLINE SLANG_CUDA_CALL T* _slang_vector_get_element_ptr(const T##3 * x, int index) \ + { \ + return ((T*)(x)) + index; \ + } \ + SLANG_FORCE_INLINE SLANG_CUDA_CALL T* _slang_vector_get_element_ptr(const T##4 * x, int index) \ + { \ + return ((T*)(x)) + index; \ } SLANG_VECTOR_GET_ELEMENT_PTR(int) SLANG_VECTOR_GET_ELEMENT_PTR(bool) @@ -689,6 +689,11 @@ struct Matrix { return rows[index]; } + + SLANG_FORCE_INLINE SLANG_CUDA_CALL const Vector<T, COLS>& operator[](size_t index) const + { + return rows[index]; + } }; @@ -2312,7 +2317,7 @@ struct StructuredBuffer } #ifndef SLANG_CUDA_STRUCTURED_BUFFER_NO_COUNT - SLANG_CUDA_CALL void GetDimensions(uint32_t* outNumStructs, uint32_t* outStride) + SLANG_CUDA_CALL void GetDimensions(uint32_t* outNumStructs, uint32_t* outStride) const { *outNumStructs = uint32_t(count); *outStride = uint32_t(sizeof(T)); |
