From 4db6bd3cd6da1871fdac520c280bd9f933e48489 Mon Sep 17 00:00:00 2001 From: jsmall-nvidia Date: Wed, 8 Jun 2022 19:51:49 -0400 Subject: Improved bounds checking for C++/CUDA (#2263) * #include an absolute path didn't work - because paths were taken to always be relative. * Use TerminatedUnownedStringSlice for literals in output C++. * Remove Escape/Unescape functions used in slang-token-reader.cpp Add target type of 'host-cpp' etc to map to the target types. * Fix some corner cases around string encoding. * Added unit test for string escaping. Fixed some assorted escaping bugs. * Updated test output. * Added decode test. * Stop using hex output, to get around 'greedy' aspect. Use octal instead. * Added HostHostCallable Small changes to use ArtifactDesc/Info instead of large switches. * Fix C++ emit to handle arbitrary function export. * Add options handling for callable without an output being specified. * Can compile with COM interface. Added example using com interface. * Use the IR Ptr type instead of hack in C++ emit for interfaces. * Fix issue with outputting the COM call when ptr is used. * Fix crash issue on compilation failure. * Add support for __global. * Added `ActualGlobalRate` Added special handling around globals and COM interfaces. Tested out in cpu-com-example. * Fix typo in NodeBase. * Support for accessing globals by name working. * Bounds checking for C++ Improved bounds checks for CUDA. * Check that actual global initialization is working. * Fix typo. * Refactor the com replacement such that it doesn't need a cache or do anything special with GlobalVar. * Fix typo in CUDA prelude. * Remove context. Only create replacement if needed. * Split out COM host-callable into a unit-test. * host-callable com testing on C++and llvm. * Comment around the COM ptr replacement. * WIP Zero bound test. * Disable com test on vs 32 bit. Fix C++ prelude * Disable 32 bit targets testing com host-callable. * For now disable zero index test. * Enable bounds checking for CPU/CUDA. * Small fixes. Disable CUDA zero index bound fix. * Add test result for bound check. * Work around for index wrapping issue. * Added Fixed array test. * Only enable prelude asserts via SLANG_PRELUDE_ENABLE_ASSERT (unless defined by the user) --- tests/compute/bound-check-zero-index.slang | 56 ++++++++++++++++++++++ .../bound-check-zero-index.slang.expected.txt | 8 ++++ 2 files changed, 64 insertions(+) create mode 100644 tests/compute/bound-check-zero-index.slang create mode 100644 tests/compute/bound-check-zero-index.slang.expected.txt (limited to 'tests/compute') diff --git a/tests/compute/bound-check-zero-index.slang b/tests/compute/bound-check-zero-index.slang new file mode 100644 index 000000000..e8244886e --- /dev/null +++ b/tests/compute/bound-check-zero-index.slang @@ -0,0 +1,56 @@ +// bound-check-zero-index.slang + +// Check 'zero indexing' bound check feature, supported by CPU and CUDA + +// Currently zero index bound checking doesn't appear to be working properly for CUDA. +//TEST(compute):COMPARE_COMPUTE:-cuda -shaderobj -Xslang... -DSLANG_ENABLE_BOUND_ZERO_INDEX -X. +//TEST(compute):COMPARE_COMPUTE:-cpu -shaderobj -Xslang... -DSLANG_ENABLE_BOUND_ZERO_INDEX -X. + +//TEST_INPUT:ubuffer(data=[1 2 3 4]):name=byteAddressBuffer +ByteAddressBuffer byteAddressBuffer; + +//TEST_INPUT:ubuffer(data=[0x10 0x20 0x30 0x40]):name=rwByteAddressBuffer +RWByteAddressBuffer rwByteAddressBuffer; + +//TEST_INPUT:ubuffer(data=[0x100 0x200 0x300 0x400], stride=4):name=structuredBuffer +StructuredBuffer structuredBuffer; + +//TEST_INPUT:ubuffer(data=[0x1000 0x2000 0x3000 0x4000], stride=4):name=rwStructuredBuffer +RWStructuredBuffer rwStructuredBuffer; + +//TEST_INPUT:ubuffer(data=[-1 -1 -1 -1], stride=4):out,name=outputBuffer +RWStructuredBuffer outputBuffer; + +//TEST_INPUT:ubuffer(data=[-1 -1 -1 -1], stride=4):out,name=outputBuffer2 +RWStructuredBuffer outputBuffer2; + +[numthreads(4, 1, 1)] +void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) +{ + int tid = dispatchThreadID.x; + + int fixedArray[3] = { 2, 5, 9}; + + int total = 0; + total += byteAddressBuffer.Load(tid * 4); + total += byteAddressBuffer.Load(-tid * 4); + + total += rwByteAddressBuffer.Load(tid * 4); + total += rwByteAddressBuffer.Load(-tid * 4); + + total += structuredBuffer[tid]; + total += structuredBuffer[-tid]; + + total += rwStructuredBuffer[tid]; + total += rwStructuredBuffer[-tid]; + + total += fixedArray[tid]; + total += fixedArray[-tid]; + + outputBuffer[tid] = total; + + // NOTE! Different threads could access this if being performed in parallel. + // So undeterministic if we write to same index (because out of range) when running in parallel + // By just adding one, all indices should be hit once + outputBuffer2[tid + 1] = total; +} \ No newline at end of file diff --git a/tests/compute/bound-check-zero-index.slang.expected.txt b/tests/compute/bound-check-zero-index.slang.expected.txt new file mode 100644 index 000000000..21f89147e --- /dev/null +++ b/tests/compute/bound-check-zero-index.slang.expected.txt @@ -0,0 +1,8 @@ +2226 +333A +444F +5559 +5559 +2226 +333A +444F -- cgit v1.2.3