summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorkaizhangNV <149626564+kaizhangNV@users.noreply.github.com>2024-10-28 11:44:36 -0500
committerGitHub <noreply@github.com>2024-10-28 09:44:36 -0700
commit0557a199d2eb205bf133c8fc111cce3a19336fde (patch)
treea934206812185b506aa61a5efb6e90930340e404
parent04329077988a2b1f7a87b1d116457599039e5e12 (diff)
Add documentation for buffer types (#5410)
* Add documentation for buffer types * address comments * Update doc for LoadxAligned functions Update the doc for all Load{2,3,4}Aligned and LoadxAligned<T> functions of buffer type. We assume that those aligned version of Load{2,3,4} and Load<T> will treat the whole buffer as type of unit{2,3,4} or T, so the address must be aligned to size of the loaded type. --------- Co-authored-by: Yong He <yonghe@outlook.com>
-rw-r--r--source/slang/hlsl.meta.slang345
1 files changed, 311 insertions, 34 deletions
diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang
index 39957f7ce..fb73496c9 100644
--- a/source/slang/hlsl.meta.slang
+++ b/source/slang/hlsl.meta.slang
@@ -93,8 +93,13 @@ __intrinsic_type($(kIROp_HLSLAppendStructuredBufferType))
struct AppendStructuredBuffer
{
__intrinsic_op($(kIROp_StructuredBufferAppend))
+ /// Appends a new element to the buffer.
+ ///@param value The element to be appended to the buffer.
void Append(T value);
+ /// Get information about the number of elements and stride of the buffer.
+ ///@param numStructs The number of elements in the buffer.
+ ///@param stride The stride of the buffer.
[ForceInline]
void GetDimensions(
out uint numStructs,
@@ -106,12 +111,24 @@ struct AppendStructuredBuffer
}
};
-/// @category buffer_types
+//@public:
+/**
+Represents an opaque handle to a read-only buffer allocated in global memory that is indexed in bytes.
+ByteAddressBuffer can be used when working with raw buffers. Raw buffer can be viewed as a bag of bits to
+which you want raw access, that is, a buffer that you can conveniently access through chunks of one to
+four 32-bit typeless address values.
+ @remarks
+This type is supported natively when targeting HLSL.
+For all other targets, this type maps to a buffer of 32bit unsigned integers.
+ @category buffer_types
+*/
__magic_type(HLSLByteAddressBufferType)
__intrinsic_type($(kIROp_HLSLByteAddressBufferType))
[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, byteaddressbuffer)]
struct ByteAddressBuffer
{
+ /// Get the number of bytes in the buffer.
+ ///@param[out] dim The number of bytes in the buffer.
[__readNone]
[ForceInline]
[require(cpp_cuda_glsl_hlsl_metal_spirv, structuredbuffer)]
@@ -129,6 +146,20 @@ struct ByteAddressBuffer
}
}
+ /// Load a 32-bit unsigned integer or value with type of `T` from the buffer at the specified location.
+ ///@param T The type of the value to load from the buffer.
+ ///@param location The input address in bytes, which must be a multiple of 4.
+ ///@param alignment Specifies the alignment of the location, which must be a multiple of 4.
+ ///@param[out] status The status of the operation.
+ ///@return The value loaded from the buffer.
+ ///
+ ///@remarks
+ /// You can't access the output parameter `status` directly; instead,
+ /// pass the status to the `CheckAccessFullyMapped` intrinsic function.
+ /// `CheckAccessFullyMapped` returns TRUE if all values from the corresponding Sample,
+ /// Gather, or Load operation accessed mapped tiles in a tiled resource.
+ /// If any values were taken from an unmapped tile, `CheckAccessFullyMapped` returns FALSE.
+ /// When targeting non-HLSL, the status is always 0.
[__readNone]
[ForceInline]
[require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer)]
@@ -153,6 +184,21 @@ struct ByteAddressBuffer
}
}
+ /// Load two 32-bit unsigned integers from the buffer at the specified location
+ /// with additional alignment.
+ ///@param location The input address in bytes.
+ ///@param alignment Specifies the alignment of the location, which must be a multiple of 4.
+ ///@param[out] status The status of the operation.
+ ///@return Two 32-bit unsigned integers loaded from the buffer.
+ ///
+ ///@remarks
+ /// This function only supports when targeting HLSL.
+ /// You can't access the output parameter `status` directly; instead,
+ /// pass the status to the `CheckAccessFullyMapped` intrinsic function.
+ /// `CheckAccessFullyMapped` returns TRUE if all values from the corresponding Sample,
+ /// Gather, or Load operation accessed mapped tiles in a tiled resource.
+ /// If any values were taken from an unmapped tile, `CheckAccessFullyMapped` returns FALSE.
+ /// When targeting non-HLSL, the status is always 0.
[__readNone]
[ForceInline]
[require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer)]
@@ -181,28 +227,47 @@ struct ByteAddressBuffer
[__readNone]
[ForceInline]
- [require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer)]
- uint2 Load2Aligned(int location)
+ [require(hlsl, byteaddressbuffer)]
+ uint2 Load2(int location, out uint status)
{
__target_switch
{
case hlsl: __intrinsic_asm ".Load2";
- default:
- return __byteAddressBufferLoad<uint2>(this, location, __naturalStrideOf<uint2>());
}
}
+ /// Load two 32-bit unsigned integers from the buffer at the specified location with alignment
+ /// of stride of `uint2`, which is 8.
+ ///@param location The input address in bytes, which must be a multiple of alignment of 8. Invalid
+ /// value of location will cause undefined behavior.
+ ///@return `uint2` Two 32-bit unsigned integers loaded from the buffer.
[__readNone]
[ForceInline]
- [require(hlsl, byteaddressbuffer)]
- uint2 Load2(int location, out uint status)
+ [require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer)]
+ uint2 Load2Aligned(int location)
{
__target_switch
{
case hlsl: __intrinsic_asm ".Load2";
+ default:
+ return __byteAddressBufferLoad<uint2>(this, location, __naturalStrideOf<uint2>());
}
}
+ /// Load three 32-bit unsigned integers from the buffer at the specified location.
+ ///@param location The input address in bytes, which must be a multiple of 4.
+ ///@param alignment Specifies the alignment of the location, which must be a multiple of 4.
+ ///@param[out] status The status of the operation.
+ ///@return `uint3` Three 32-bit unsigned integer value loaded from the buffer.
+ ///
+ ///@remarks
+ /// This function only supports when targeting HLSL.
+ /// You can't access the output parameter `status` directly; instead,
+ /// pass the status to the `CheckAccessFullyMapped` intrinsic function.
+ /// `CheckAccessFullyMapped` returns TRUE if all values from the corresponding Sample,
+ /// Gather, or Load operation accessed mapped tiles in a tiled resource.
+ /// If any values were taken from an unmapped tile, `CheckAccessFullyMapped` returns FALSE.
+ /// When targeting non-HLSL, the status is always 0.
[__readNone]
[ForceInline]
[require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer)]
@@ -231,28 +296,45 @@ struct ByteAddressBuffer
[__readNone]
[ForceInline]
- [require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer)]
- uint3 Load3Aligned(int location)
+ [require(hlsl, byteaddressbuffer)]
+ uint3 Load3(int location, out uint status)
{
__target_switch
{
case hlsl: __intrinsic_asm ".Load3";
- default:
- return __byteAddressBufferLoad<uint3>(this, location, __naturalStrideOf<uint3>());
}
}
+ /// Load three 32-bit unsigned integers from the buffer at the specified location with alignment
+ /// of stride of `uint3`, which is 12.
+ ///@param location The input address in bytes which must be a multiple of alignment of 12.
+ ///@return `uint3` Three 32-bit unsigned integer value loaded from the buffer.
[__readNone]
[ForceInline]
- [require(hlsl, byteaddressbuffer)]
- uint3 Load3(int location, out uint status)
+ [require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer)]
+ uint3 Load3Aligned(int location)
{
__target_switch
{
case hlsl: __intrinsic_asm ".Load3";
+ default:
+ return __byteAddressBufferLoad<uint3>(this, location, __naturalStrideOf<uint3>());
}
}
+ /// Load four 32-bit unsigned integers from the buffer at the specified location.
+ ///@param location The input address in bytes which must be a multiple of alignment of 4.
+ ///@param alignment Specifies the alignment of the location, which must be a multiple of 4.
+ ///@param[out] status The status of the operation.
+ ///@return `uint4` Four 32-bit unsigned integer value loaded from the buffer.
+ ///
+ ///@remarks
+ /// This function only supports when targeting HLSL.
+ /// You can't access the output parameter `status` directly; instead,
+ /// pass the status to the `CheckAccessFullyMapped` intrinsic function.
+ /// `CheckAccessFullyMapped` returns TRUE if all values from the corresponding Sample,
+ /// Gather, or Load operation accessed mapped tiles in a tiled resource.
+ /// If any values were taken from an unmapped tile, `CheckAccessFullyMapped` returns FALSE.
[__readNone]
[ForceInline]
[require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer)]
@@ -281,25 +363,29 @@ struct ByteAddressBuffer
[__readNone]
[ForceInline]
- [require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer)]
- uint4 Load4Aligned(int location)
+ [require(hlsl, byteaddressbuffer)]
+ uint4 Load4(int location, out uint status)
{
__target_switch
{
case hlsl: __intrinsic_asm ".Load4";
- default:
- return __byteAddressBufferLoad<uint4>(this, location, __naturalStrideOf<uint4>());
}
}
+ /// Load four 32-bit unsigned integers from the buffer at the specified location with alignment
+ /// of `uint4`, which is 16.
+ ///@param location The input address in bytes which must be a multiple of alignment of 16.
+ ///@return `uint4` Four 32-bit unsigned integer value loaded from the buffer.
[__readNone]
[ForceInline]
- [require(hlsl, byteaddressbuffer)]
- uint4 Load4(int location, out uint status)
+ [require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer)]
+ uint4 Load4Aligned(int location)
{
__target_switch
{
case hlsl: __intrinsic_asm ".Load4";
+ default:
+ return __byteAddressBufferLoad<uint4>(this, location, __naturalStrideOf<uint4>());
}
}
@@ -317,6 +403,11 @@ struct ByteAddressBuffer
return __byteAddressBufferLoad<T>(this, location, alignment);
}
+ /// Load an element with type `T` from the buffer at the specified location with alignment of `T`.
+ ///@param location The input address in bytes which must be a multiply of size of `T`.
+ ///@return T value with type `T` loaded from the buffer.
+ ///@remarks
+ ///Currently, this function only supports when `T` is scalar, vector or matrix type.
[__readNone]
[ForceInline]
T LoadAligned<T>(int location)
@@ -4011,6 +4102,10 @@ __magic_type(HLSLStructuredBufferType)
__intrinsic_type($(kIROp_HLSLStructuredBufferType))
struct StructuredBuffer
{
+
+ /// Get the dimensions of the buffer.
+ /// @param numStructs The number of structures in the buffer.
+ /// @param stride The stride, in bytes, of each structure element.
[__readNone]
[ForceInline]
void GetDimensions(
@@ -4022,6 +4117,18 @@ struct StructuredBuffer
stride = rs.y;
}
+ /// Load a element from the buffer at the specified location.
+ /// @param TIndex Type of the index.
+ /// @param location The index of buffer.
+ /// @param[out] status The status of the operation.
+ /// @return The element at the specified index.
+ ///
+ /// @remarks
+ /// You can't access the output parameter `status` directly; instead,
+ /// pass the status to the `CheckAccessFullyMapped` intrinsic function.
+ /// `CheckAccessFullyMapped` returns TRUE if all values from the corresponding Sample,
+ /// Gather, or Load operation accessed mapped tiles in a tiled resource.
+ /// If any values were taken from an unmapped tile, `CheckAccessFullyMapped` returns FALSE.
__intrinsic_op($(kIROp_StructuredBufferLoad))
[__readNone]
[require(cpp_cuda_glsl_hlsl_spirv, structuredbuffer)]
@@ -4031,6 +4138,10 @@ struct StructuredBuffer
[require(hlsl, structuredbuffer)]
T Load<TIndex : __BuiltinIntegerType>(TIndex location, out uint status);
+ /// Load a element from the buffer at the specified location.
+ /// @param TIndex Type of the index.
+ /// @param index The index of buffer.
+ /// @return The element at the specified index.
__generic<TIndex : __BuiltinIntegerType>
__subscript(TIndex index) -> T
{
@@ -4066,9 +4177,16 @@ __intrinsic_type($(kIROp_HLSLConsumeStructuredBufferType))
[require(cpp_cuda_glsl_hlsl_spirv, consumestructuredbuffer)]
struct ConsumeStructuredBuffer
{
+ /// Reading the element at the end of the buffer indicated by the associated atomic counter
+ /// and decrement the builtin atomic counter by 1.
+ ///@return The element read from the buffer, it can be a structure.
__intrinsic_op($(kIROp_StructuredBufferConsume))
T Consume();
+ ///Gets the dimensions of the resource.
+ ///@param[out] numStructs The number of structures in the buffer.
+ ///@param[out] stride The stride, in bytes, of each element
+
[ForceInline]
void GetDimensions(
out uint numStructs,
@@ -4143,7 +4261,16 @@ static const struct {
for(auto item : kMutableByteAddressBufferCases) {
}}}}
-/// @category buffer_types
+//@public:
+/**
+Represents an opaque handle to a read-write buffer allocated in global memory that is indexed in bytes.
+This type can be used when working with raw buffers. Raw buffer can be viewed as a bag of bits to
+which you want raw access, that is, a buffer that you can conveniently access through chunks of one to
+four 32-bit typeless address values.
+ @remarks
+This type is supported natively when targeting HLSL.
+ @category buffer_types
+*/
__magic_type(HLSL$(item.name)Type)
__intrinsic_type($(item.op))
struct $(item.name)
@@ -4151,6 +4278,8 @@ struct $(item.name)
// Note(tfoley): supports all operations from `ByteAddressBuffer`
// TODO(tfoley): can this be made a sub-type?
+ /// Get the number of bytes in the buffer.
+ ///@param[out] dim The number of bytes in the buffer.
[ForceInline]
[require(cpp_cuda_glsl_hlsl_spirv, structuredbuffer_rw)]
void GetDimensions(out uint dim)
@@ -4166,6 +4295,20 @@ struct $(item.name)
}
}
+ /// Load a 32-bit unsigned integer or value with type of `T` from the buffer at the specified location.
+ ///@param T The type of the value to load from the buffer.
+ ///@param location The input address in bytes, which must be a multiple of 4.
+ ///@param alignment Specifies the alignment of the location, which must be a multiple of 4.
+ ///@param[out] status The status of the operation.
+ ///@return The value loaded from the buffer.
+ ///
+ ///@remarks
+ /// You can't access the output parameter `status` directly; instead,
+ /// pass the status to the `CheckAccessFullyMapped` intrinsic function.
+ /// `CheckAccessFullyMapped` returns TRUE if all values from the corresponding Sample,
+ /// Gather, or Load operation accessed mapped tiles in a tiled resource.
+ /// If any values were taken from an unmapped tile, `CheckAccessFullyMapped` returns FALSE.
+ /// When targeting non-HLSL, the status is always 0.
[__NoSideEffect]
[ForceInline]
[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, byteaddressbuffer_rw)]
@@ -4190,6 +4333,21 @@ struct $(item.name)
}
}
+ /// Load two 32-bit unsigned integers from the buffer at the specified location
+ /// with additional alignment.
+ ///@param location The input address in bytes.
+ ///@param alignment Specifies the alignment of the location, which must be a multiple of 4.
+ ///@param[out] status The status of the operation.
+ ///@return Two 32-bit unsigned integers loaded from the buffer.
+ ///
+ ///@remarks
+ /// This function only supports when targeting HLSL.
+ /// You can't access the output parameter `status` directly; instead,
+ /// pass the status to the `CheckAccessFullyMapped` intrinsic function.
+ /// `CheckAccessFullyMapped` returns TRUE if all values from the corresponding Sample,
+ /// Gather, or Load operation accessed mapped tiles in a tiled resource.
+ /// If any values were taken from an unmapped tile, `CheckAccessFullyMapped` returns FALSE.
+ /// When targeting non-HLSL, the status is always 0.
[__NoSideEffect]
[ForceInline]
[require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)]
@@ -4216,6 +4374,10 @@ struct $(item.name)
}
}
+ /// Load two 32-bit unsigned integers from the buffer at the specified location with alignment
+ /// of `uint2`, which is 8.
+ ///@param location The input address in bytes, which must be a multiple of alignment of 8.
+ ///@return `uint2` Two 32-bit unsigned integers loaded from the buffer.
[__NoSideEffect]
[ForceInline]
[require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)]
@@ -4240,6 +4402,20 @@ struct $(item.name)
}
}
+ /// Load three 32-bit unsigned integers from the buffer at the specified location.
+ ///@param location The input address in bytes, which must be a multiple of 4.
+ ///@param alignment Specifies the alignment of the location, which must be a multiple of 4.
+ ///@param[out] status The status of the operation.
+ ///@return `uint3` Three 32-bit unsigned integer value loaded from the buffer.
+ ///
+ ///@remarks
+ /// This function only supports when targeting HLSL.
+ /// You can't access the output parameter `status` directly; instead,
+ /// pass the status to the `CheckAccessFullyMapped` intrinsic function.
+ /// `CheckAccessFullyMapped` returns TRUE if all values from the corresponding Sample,
+ /// Gather, or Load operation accessed mapped tiles in a tiled resource.
+ /// If any values were taken from an unmapped tile, `CheckAccessFullyMapped` returns FALSE.
+ /// When targeting non-HLSL, the status is always 0.
[__NoSideEffect]
[ForceInline]
[require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)]
@@ -4266,6 +4442,10 @@ struct $(item.name)
}
}
+ /// Load three 32-bit unsigned integers from the buffer at the specified location with alignment
+ /// of `uint3`, which is 12.
+ ///@param location The input address in bytes which must be a multiple of alignment of 12.
+ ///@return `uint3` Three 32-bit unsigned integer value loaded from the buffer.
[__NoSideEffect]
[ForceInline]
[require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)]
@@ -4290,6 +4470,19 @@ struct $(item.name)
}
}
+ /// Load four 32-bit unsigned integers from the buffer at the specified location.
+ ///@param location The input address in bytes which must be a multiple of alignment of 4.
+ ///@param alignment Specifies the alignment of the location, which must be a multiple of 4.
+ ///@param[out] status The status of the operation.
+ ///@return `uint4` Four 32-bit unsigned integer value loaded from the buffer.
+ ///
+ ///@remarks
+ /// This function only supports when targeting HLSL.
+ /// You can't access the output parameter `status` directly; instead,
+ /// pass the status to the `CheckAccessFullyMapped` intrinsic function.
+ /// `CheckAccessFullyMapped` returns TRUE if all values from the corresponding Sample,
+ /// Gather, or Load operation accessed mapped tiles in a tiled resource.
+ /// If any values were taken from an unmapped tile, `CheckAccessFullyMapped` returns FALSE.
[__NoSideEffect]
[ForceInline]
[require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)]
@@ -4316,6 +4509,10 @@ struct $(item.name)
}
}
+ /// Load four 32-bit unsigned integers from the buffer at the specified location with alignment
+ /// of `uint4`, which is 16.
+ ///@param location The input address in bytes which must be a multiple of alignment of 16.
+ ///@return `uint4` Four 32-bit unsigned integer value loaded from the buffer.
[__NoSideEffect]
[ForceInline]
[require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)]
@@ -4356,6 +4553,11 @@ struct $(item.name)
return __byteAddressBufferLoad<T>(this, location, alignment);
}
+ /// Load an element with type `T` from the buffer at the specified location with alignment of `T`.
+ ///@param location The input address in bytes which must be a multiple of size of `T`.
+ ///@return T value with type `T` loaded from the buffer.
+ ///@remarks
+ ///Currently, this function only supports when `T` is scalar, vector, or matrix type.
[__NoSideEffect]
[ForceInline]
[require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)]
@@ -4442,9 +4644,12 @@ ${{{{
}
// FP16x2
- /// @internal
+ ///@internal
/// Maps to the `NvInterlockedAddFp16x2` NVAPI function.
- ///
+ /// Perform 2 16-bit floating point atomic add operations at `byteAddress`.
+ /// @param byteAddress The address at which to perform the atomic add operation.
+ /// @param fp16x2Value Two 16-bit floating point values are packed into a 32-bit unsigned integer.
+ /// @return The 2 16-bit floating point values packed into a 32-bit unsigned integer.
[__requiresNVAPI]
[ForceInline]
[require(cuda_hlsl_spirv)]
@@ -4463,7 +4668,7 @@ ${{{{
/// Perform a 16-bit floating point atomic add operation at `byteAddress`.
/// @param byteAddress The address at which to perform the atomic add operation.
- /// @param valueToAdd The value to add to the value at `byteAddress`.
+ /// @param value The value to add to the value at `byteAddress`.
/// @param originalValue The original value at `byteAddress` before the add operation.
/// @remarks For SPIR-V, this function maps to `OpAtomicFAdd` and requires `SPV_EXT_shader_atomic_float16_add` extension.
///
@@ -4500,7 +4705,7 @@ ${{{{
/// Perform a 16-bit floating point atomic add operation at `byteAddress` through emulation using `half2` atomics.
/// @param byteAddress The address at which to perform the atomic add operation.
- /// @param valueToAdd The value to add to the value at `byteAddress`.
+ /// @param value The value to add to the value at `byteAddress`.
/// @param originalValue The original value at `byteAddress` before the add operation.
/// @remarks For SPIR-V, this function maps to `OpAtomicFAdd` on a `half2` vector with the correct part set to `value`
/// and the remaining part set to 0. This requires the `AtomicFloat16VectorNV` capability introduced by the `SPV_NV_shader_atomic_fp16_vector`
@@ -4594,7 +4799,7 @@ ${{{{
/// @param byteAddress The address at which to perform the atomic compare-and-exchange operation.
/// @param compareValue The value to compare to the value at `byteAddress`.
/// @param value The value to store at `byteAddress` if the comparison is successful.
- /// @param originalValue The original value at `byteAddress` before the add operation.
+ /// @param outOriginalValue The original value at `byteAddress` before the add operation.
/// @remarks For SPIR-V, this function maps to `OpAtomicCompareExchange`. For HLSL, this function
/// translates to `InterlockedCompareExchange64` and requires shader model 6.6.
/// For CUDA, this function maps to `atomicCAS`.
@@ -4618,6 +4823,10 @@ ${{{{
${{{{
for (auto op : bufferAtomicOps) {
}}}}
+
+ /// Perform a 64-bit unsigned integer atomic $(op.internalName) operation at `byteAddress`.
+ /// @param byteAddress The address at which to perform the atomic $(op.internalName) operation.
+ /// @param value The operand for the $(op.internalName) operation.
[ForceInline]
[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda9_int64)]
uint64_t Interlocked$(op.name)U64(uint byteAddress, uint64_t value)
@@ -4638,7 +4847,7 @@ ${{{{
/// Perform a 64-bit integer atomic $(op.internalName) operation at `byteAddress`.
/// @param byteAddress The address at which to perform the atomic $(op.internalName) operation.
/// @param value The operand for the $(op.internalName) operation.
- /// @param originalValue The original value at `byteAddress` before the $(op.internalName) operation.
+ /// @param outOriginalValue The original value at `byteAddress` before the $(op.internalName) operation.
[ForceInline]
[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda9_int64)]
void Interlocked$(op.name)64<T:__BuiltinInt64Type>(uint byteAddress, T value, out T outOriginalValue)
@@ -4679,6 +4888,14 @@ ${{{{
}
}
+ /// Perform a floating-point atomic bitwise compare-and-exchange operation at `byteAddress`.
+ /// @param byteAddress The address at which to perform the atomic exchange operation.
+ /// @param compareValue The value to compare to the value at `byteAddress`.
+ /// @param value The value to store at `byteAddress`.
+ /// @param [out] outOriginalValue The original value at `byteAddress` before the exchange operation.
+ /// @remarks For SPIR-V, this function maps to `OpAtomicCompareExchange`. For HLSL, this function
+ /// translates to `InterlockedCompareExchangeFloatBitwise` and requires shader model 6.6.
+ /// For CUDA, this function maps to `atomicCAS`.
[ForceInline]
[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda9_int64)]
void InterlockedCompareExchangeFloatBitwise(uint byteAddress, float compareValue, float value, out float outOriginalValue)
@@ -4858,7 +5075,11 @@ ${{{{
}
}
-
+ /// Set one value to the buffer at the specified location.
+ ///@param T The type of the value to load from the buffer.
+ ///@param value The input value.
+ ///@param address The input address in bytes, which must be a multiple of 4.
+ ///@param alignment Specifies the alignment of the location, which must be a multiple of 4.
[ForceInline]
[require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)]
void Store(uint address, uint value)
@@ -4872,6 +5093,10 @@ ${{{{
}
+ /// Set two values to the buffer at the specified location.
+ ///@param address The input address in bytes, which must be a multiple of 4.
+ ///@param value Two input values.
+ ///@param alignment Specifies the alignment of the location, which must be a multiple of 4.
[ForceInline]
[require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)]
void Store2(uint address, uint2 value)
@@ -4897,6 +5122,10 @@ ${{{{
}
}
+ /// Set two values to the buffer at the specified location, the address will be aligned
+ /// to the alignment of `uint2`, which is 8.
+ ///@param address The input address in bytes, which must be a multiple of 8.
+ ///@param value Two input values.
[ForceInline]
[require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)]
void Store2Aligned(uint address, uint2 value)
@@ -4909,6 +5138,10 @@ ${{{{
}
}
+ /// Set three values to the buffer at the specified location.
+ ///@param address The input address in bytes, which must be a multiple of 4.
+ ///@param value Three input values.
+ ///@param alignment Specifies the alignment of the location, which must be a multiple of 4.
[ForceInline]
[require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)]
void Store3(uint address, uint3 value)
@@ -4921,7 +5154,6 @@ ${{{{
}
}
-
[ForceInline]
[require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)]
void Store3(uint address, uint3 value, uint alignment)
@@ -4934,6 +5166,10 @@ ${{{{
}
}
+ /// Set three values to the buffer at the specified location, the address will be aligned
+ /// to the alignment of `uint3`, which is 12.
+ ///@param address The input address in bytes, which must be a multiple of 12.
+ ///@param value Three input values.
[ForceInline]
[require(cpp_cuda_glsl_hlsl_spirv, byteaddressbuffer_rw)]
void Store3Aligned(uint address, uint3 value)
@@ -4946,6 +5182,10 @@ ${{{{
}
}
+ /// Set four values to the buffer at the specified location.
+ ///@param address The input address in bytes, which must be a multiple of 4.
+ ///@param value Four input values.
+ ///@param alignment Specifies the alignment of the location, which must be a multiple of 4.
[ForceInline]
[require(cpp_cuda_glsl_hlsl_spirv, byteaddressbuffer_rw)]
void Store4(uint address, uint4 value)
@@ -4971,6 +5211,10 @@ ${{{{
}
}
+ /// Set four values to the buffer at the specified location, the address will be aligned
+ /// to the alignment of `uint4`, which is 16.
+ ///@param address The input address in bytes, which must be a multiple of 16.
+ ///@param value Four input values.
[ForceInline]
[require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)]
void Store4Aligned(uint address, uint4 value)
@@ -4984,21 +5228,26 @@ ${{{{
}
[ForceInline]
- void Store<T>(int offset, T value)
+ void Store<T>(uint address, T value)
{
- __byteAddressBufferStore(this, offset, 0, value);
+ __byteAddressBufferStore(this, address, 0, value);
}
[ForceInline]
- void Store<T>(int offset, T value, uint alignment)
+ void Store<T>(uint address, T value, uint alignment)
{
- __byteAddressBufferStore(this, offset, alignment, value);
+ __byteAddressBufferStore(this, address, alignment, value);
}
+ /// Set four values to the buffer at the specified location, the address will be aligned
+ /// to the alignment of `T`.
+ ///@param T The type of the input value.
+ ///@param address The input address in bytes, which must be a multiple of size of `T`.
+ ///@param value The input value.
[ForceInline]
- void StoreAligned<T>(int offset, T value)
+ void StoreAligned<T>(uint address, T value)
{
- __byteAddressBufferStore(this, offset, __naturalStrideOf<T>(), value);
+ __byteAddressBufferStore(this, address, __naturalStrideOf<T>(), value);
}
};
@@ -5038,8 +5287,15 @@ When generating code for other targets, this parameter is ignored and has no eff
**/
struct $(item.name)
{
+ /// Decrements the object's hidden counter.
+ /// @return The post-decremented counter value.
+ /// @remarks
+ /// This function is not implemented when targeting non-HLSL.
uint DecrementCounter();
+ /// Get the dimensions of the buffer.
+ /// @param numStructs The number of structures in the buffer.
+ /// @param stride The stride, in bytes, of each structure element.
[__readNone]
[ForceInline]
[require(cpp_cuda_glsl_hlsl_metal_spirv, structuredbuffer_rw)]
@@ -5057,8 +5313,25 @@ struct $(item.name)
}
}
+ /// Increment the object's hidden counter.
+ /// @return The pre-incremented counter value.
+ /// @remarks
+ /// This function is not implemented when targeting non-HLSL.
uint IncrementCounter();
+ /// Load a element from the buffer at the specified location.
+ /// @param TIndex Type of the index.
+ /// @param location The index of buffer.
+ /// @param[out] status The status of the operation.
+ /// @return The element at the specified index.
+ ///
+ /// @remarks
+ /// You can't access the output parameter `status` directly; instead,
+ /// pass the status to the `CheckAccessFullyMapped` intrinsic function.
+ /// `CheckAccessFullyMapped` returns TRUE if all values from the corresponding Sample,
+ /// Gather, or Load operation accessed mapped tiles in a tiled resource.
+ /// If any values were taken from an unmapped tile, `CheckAccessFullyMapped` returns FALSE.
+ /// When targeting non-HLSL, the status is always 0.
[__NoSideEffect]
__intrinsic_op($(kIROp_RWStructuredBufferLoad))
T Load<TIndex : __BuiltinIntegerType>(TIndex location);
@@ -5067,6 +5340,10 @@ struct $(item.name)
__intrinsic_op($(kIROp_RWStructuredBufferLoadStatus))
T Load<TIndex : __BuiltinIntegerType>(TIndex location, out uint status);
+ /// Load a element from the buffer at the specified location.
+ /// @param TIndex Type of the index.
+ /// @param index The index of buffer.
+ /// @return The element at the specified index.
__generic<TIndex : __BuiltinIntegerType>
__subscript(TIndex index) -> T
{