6 files changed, 57 insertions, 10 deletions
diff --git a/docs/target-compatibility.md b/docs/target-compatibility.md
index ff63a65a2..0b29a6152 100644
--- a/docs/target-compatibility.md
+++ b/docs/target-compatibility.md
@@ -37,7 +37,8 @@ Items with ^ means there is some discussion about support later in the document
 | Full bool                   |     Yes      |   Yes        |   Yes      |     No        |    Yes ^ 
 | Mesh Shader                 |     No       |   No +       |   No +     |     No        |    No
 | `[unroll]`                  |     Yes      |   Yes        |   Yes ^    |     Yes       |    Limited + 
-
+| Atomics                     |     Yes      |   Yes        |   Yes      |     Yes       |    No + 
+| Atomics on RWBuffer         |     Yes      |   Yes        |   Yes      |     No        |    No + 
 
 ## Half Type
 
@@ -161,3 +162,10 @@ On GLSL and VK targets loop unrolling uses the [GL_EXT_control_flow_attributes](
 
 Slang does have a cross target mechanism to [unroll loops](language-reference/06-statements.md), in the section `Compile-Time For Statement`.
 
+## Atomics on RWBuffer
+
+For VK the GLSL output from Slang seems plausible, but VK binding fails in tests harness.
+
+On CUDA RWBuffer becomes CUsurfObject, which is a 'texture' type and does not support atomics. 
+
+On the CPU atomics are not supported, but will be in the future.
diff --git a/prelude/slang-cpp-types.h b/prelude/slang-cpp-types.h
index 415d7f941..d57ac0e09 100644
--- a/prelude/slang-cpp-types.h
+++ b/prelude/slang-cpp-types.h
@@ -116,6 +116,29 @@ struct StructuredBuffer
     size_t count;
 };
 
+
+template <typename T>
+struct RWBuffer
+{
+    SLANG_FORCE_INLINE T& operator[](size_t index) const { SLANG_PRELUDE_ASSERT(index < count); return data[index]; }
+    const T& Load(size_t index) const { SLANG_PRELUDE_ASSERT(index < count); return data[index]; }
+    void GetDimensions(uint32_t& outCount) { outCount = uint32_t(count); }
+    
+    T* data;
+    size_t count;
+};
+
+template <typename T>
+struct Buffer
+{
+    SLANG_FORCE_INLINE const T& operator[](size_t index) const { SLANG_PRELUDE_ASSERT(index < count); return data[index]; }
+    const T& Load(size_t index) const { SLANG_PRELUDE_ASSERT(index < count); return data[index]; }
+    void GetDimensions(uint32_t& outCount) { outCount = uint32_t(count); }
+    
+    T* data;
+    size_t count;
+};
+
 // Missing  Load(_In_  int  Location, _Out_ uint Status);
 struct ByteAddressBuffer
 {
diff --git a/tests/compute/atomics-buffer.slang b/tests/compute/atomics-buffer.slang
index 1739d4bbc..cf92f1b21 100644
--- a/tests/compute/atomics-buffer.slang
+++ b/tests/compute/atomics-buffer.slang
@@ -2,6 +2,13 @@
 
 //TEST(compute):COMPARE_COMPUTE_EX:-slang -compute
 //TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12
+// Doesn't work on VK - GLSL output doesn't replace InterlockedAdd.  
+//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -vk
+// Cannot work on CUDA, as outputBuffer becomes a CUsurfObject - which do not appear to have atomics available.
+// If the buffer was a StructuredBuffer this would work on CUDA.
+//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -cuda
+// Atomics not available on CPU currently
+//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -cpu
 
 //TEST_INPUT:ubuffer(format=R_UInt32, data=[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]):out,name outputBuffer
 
diff --git a/tests/compute/atomics-groupshared.slang b/tests/compute/atomics-groupshared.slang
index 9e237bee1..9c3e1ab8d 100644
--- a/tests/compute/atomics-groupshared.slang
+++ b/tests/compute/atomics-groupshared.slang
@@ -2,6 +2,7 @@
 
 //TEST(compute):COMPARE_COMPUTE_EX:-slang -compute
 //TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12
+//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -vk
 //TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -cuda
 
 //TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):out,name outputBuffer
@@ -18,13 +19,20 @@ uint test(uint val)
 
     GroupMemoryBarrierWithGroupSync();
 
-	InterlockedAdd(shared[val], 		val, 		originalValue);
-	InterlockedAdd(shared[val ^ 1], 	val*16, 	originalValue);
-	InterlockedAdd(shared[val ^ 2], 	val*16*16, 	originalValue);
-	
+    uint originalSum = 0;
+
+    InterlockedAdd(shared[val],         val,         originalValue);
+    originalSum += originalValue;
+    
+    InterlockedAdd(shared[val ^ 1],     val*16,     originalValue);
+    originalSum += originalValue;
+    
+    InterlockedAdd(shared[val ^ 2],     val*16*16,     originalValue);
+    originalSum += originalValue;
+    
     GroupMemoryBarrierWithGroupSync();
 
-    return shared[val];
+    return shared[val] ^ originalSum;
 }
 
 [numthreads(4, 1, 1)]
diff --git a/tests/compute/atomics-groupshared.slang.expected.txt b/tests/compute/atomics-groupshared.slang.expected.txt
index 30966f0df..11ea781c9 100644
--- a/tests/compute/atomics-groupshared.slang.expected.txt
+++ b/tests/compute/atomics-groupshared.slang.expected.txt
@@ -1,4 +1,4 @@
-210
-301
- 32
-123
+223
+322
+21
+120
diff --git a/tests/compute/atomics.slang b/tests/compute/atomics.slang
index ddb5523e9..cbd35d1dc 100644
--- a/tests/compute/atomics.slang
+++ b/tests/compute/atomics.slang
@@ -2,6 +2,7 @@
 
 //TEST(compute):COMPARE_COMPUTE_EX:-slang -compute
 //TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12
+//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -vk
 //TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -cuda
 
 //TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):out, name outputBuffer