From 4db6bd3cd6da1871fdac520c280bd9f933e48489 Mon Sep 17 00:00:00 2001
From: jsmall-nvidia <jsmall@nvidia.com>
Date: Wed, 8 Jun 2022 19:51:49 -0400
Subject: Improved bounds checking for C++/CUDA (#2263)

* #include an absolute path didn't work - because paths were taken to always be relative.

* Use TerminatedUnownedStringSlice for literals in output C++.

* Remove Escape/Unescape functions used in slang-token-reader.cpp
Add target type of 'host-cpp' etc to map to the target types.

* Fix some corner cases around string encoding.

* Added unit test for string escaping.
Fixed some assorted escaping bugs.

* Updated test output.

* Added decode test.

* Stop using hex output, to get around 'greedy' aspect. Use octal instead.

* Added HostHostCallable
Small changes to use ArtifactDesc/Info instead of large switches.

* Fix C++ emit to handle arbitrary function export.

* Add options handling for callable without an output being specified.

* Can compile with COM interface. Added example using com interface.

* Use the IR Ptr type instead of hack in C++ emit for interfaces.

* Fix issue with outputting the COM call when ptr is used.

* Fix crash issue on compilation failure.

* Add support for __global.

* Added `ActualGlobalRate`
Added special handling around globals and COM interfaces.
Tested out in cpu-com-example.

* Fix typo in NodeBase.

* Support for accessing globals by name working.

* Bounds checking for C++
Improved bounds checks for CUDA.

* Check that actual global initialization is working.

* Fix typo.

* Refactor the com replacement such that it doesn't need a cache or do anything special with GlobalVar.

* Fix typo in CUDA prelude.

* Remove context.
Only create replacement if needed.

* Split out COM host-callable into a unit-test.

* host-callable com testing on C++and llvm.

* Comment around the COM ptr replacement.

* WIP Zero bound test.

* Disable com test on vs 32 bit.
Fix C++ prelude

* Disable 32 bit targets testing com host-callable.

* For now disable zero index test.

* Enable bounds checking for CPU/CUDA.

* Small fixes.
Disable CUDA zero index bound fix.

* Add test result for bound check.

* Work around for index wrapping issue.

* Added Fixed array test.

* Only enable prelude asserts via SLANG_PRELUDE_ENABLE_ASSERT (unless defined by the user)
---
 tests/compute/bound-check-zero-index.slang         | 56 ++++++++++++++++++++++
 .../bound-check-zero-index.slang.expected.txt      |  8 ++++
 2 files changed, 64 insertions(+)
 create mode 100644 tests/compute/bound-check-zero-index.slang
 create mode 100644 tests/compute/bound-check-zero-index.slang.expected.txt

(limited to 'tests/compute')
diff --git a/tests/compute/bound-check-zero-index.slang b/tests/compute/bound-check-zero-index.slang
new file mode 100644
index 000000000..e8244886e
--- /dev/null
+++ b/tests/compute/bound-check-zero-index.slang
@@ -0,0 +1,56 @@
+// bound-check-zero-index.slang
+
+// Check 'zero indexing' bound check feature, supported by CPU and CUDA
+
+// Currently zero index bound checking doesn't appear to be working properly for CUDA.
+//TEST(compute):COMPARE_COMPUTE:-cuda -shaderobj -Xslang... -DSLANG_ENABLE_BOUND_ZERO_INDEX -X.
+//TEST(compute):COMPARE_COMPUTE:-cpu -shaderobj -Xslang... -DSLANG_ENABLE_BOUND_ZERO_INDEX -X.
+
+//TEST_INPUT:ubuffer(data=[1 2 3 4]):name=byteAddressBuffer
+ByteAddressBuffer byteAddressBuffer;
+
+//TEST_INPUT:ubuffer(data=[0x10 0x20 0x30 0x40]):name=rwByteAddressBuffer
+RWByteAddressBuffer rwByteAddressBuffer;
+
+//TEST_INPUT:ubuffer(data=[0x100 0x200 0x300 0x400], stride=4):name=structuredBuffer
+StructuredBuffer<int> structuredBuffer;
+
+//TEST_INPUT:ubuffer(data=[0x1000 0x2000 0x3000 0x4000], stride=4):name=rwStructuredBuffer
+RWStructuredBuffer<int> rwStructuredBuffer;
+
+//TEST_INPUT:ubuffer(data=[-1 -1 -1 -1], stride=4):out,name=outputBuffer
+RWStructuredBuffer<int> outputBuffer;
+
+//TEST_INPUT:ubuffer(data=[-1 -1 -1 -1], stride=4):out,name=outputBuffer2
+RWStructuredBuffer<int> outputBuffer2;
+
+[numthreads(4, 1, 1)]
+void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
+{
+	int tid = dispatchThreadID.x;
+    
+    int fixedArray[3] = { 2, 5, 9};
+    
+    int total = 0;
+    total += byteAddressBuffer.Load<int>(tid * 4);
+    total += byteAddressBuffer.Load<int>(-tid * 4);
+    
+    total += rwByteAddressBuffer.Load<int>(tid * 4);
+    total += rwByteAddressBuffer.Load<int>(-tid * 4);
+    
+    total += structuredBuffer[tid];
+    total += structuredBuffer[-tid];
+    
+    total += rwStructuredBuffer[tid];
+    total += rwStructuredBuffer[-tid];
+    
+    total += fixedArray[tid];
+    total += fixedArray[-tid];
+    
+    outputBuffer[tid] = total;
+    
+    // NOTE! Different threads could access this if being performed in parallel.
+    // So undeterministic if we write to same index (because out of range) when running in parallel
+    // By just adding one, all indices should be hit once
+    outputBuffer2[tid + 1] = total;
+}
\ No newline at end of file
diff --git a/tests/compute/bound-check-zero-index.slang.expected.txt b/tests/compute/bound-check-zero-index.slang.expected.txt
new file mode 100644
index 000000000..21f89147e
--- /dev/null
+++ b/tests/compute/bound-check-zero-index.slang.expected.txt
@@ -0,0 +1,8 @@
+2226
+333A
+444F
+5559
+5559
+2226
+333A
+444F
-- 
cgit v1.2.3