summaryrefslogtreecommitdiff
path: root/tests/hlsl/dxsdk/FluidCS11
diff options
context:
space:
mode:
authorTim Foley <tfoleyNV@users.noreply.github.com>2018-12-07 13:31:06 -0800
committerGitHub <noreply@github.com>2018-12-07 13:31:06 -0800
commit135eaff6b892fc91a398714ddcf7ef377cd4cccb (patch)
treee69f30a4fadfdb834ea141c1ec9efc862ccc70d3 /tests/hlsl/dxsdk/FluidCS11
parentb0c2423f00b910f2f4d5010e6a04114112e294fd (diff)
Change how buffers are emitted (#741)
* Change how buffers are emitted This is a change with a lot of pieces, which can't always be separated out cleanly. I'm going to walk through them in what I hope is a logical order. The main goal of this change was to allow arrays of structured buffers to translate to Vulkan. Consider two declarations of structured buffers in HLSL/Slang: ```hlsl StructuredBuffer<X> single; StructuredBuffer<Y> multiple[10]; ``` The current translation logic was handling `single` by translating it into an *unnamed* GLSL `buffer` block like: ```glsl layout(std430) buffer _S1 { X single[]; }; ``` That syntax allows an expression like `single[i]` in Slang to be translated simply as `single[i]` in GLSL. But that naive translating doesn't work for `multiple`, since we need to declare a array of blocks in GLSL, which requires giving the whole thing a name: ```glsl layout(std430) buffer _S2 { Y _data[]; } multiple[10]; ``` Now a reference to `multiple[i][j]` in Slang needs to become `multiple[i]._data[j]` in GLSL. To avoid having way too many special cases around single structured buffers vs. arrays, it makes sense to allows emit things in the latter form, so that we instead lower `single` as: ```glsl layout(std430) buffer _S1 { X _data[]; } single; ``` So that now a reference to `single[i]` becomes `single._data[i]` in GLSL. Most of that can be handled in the standard library translation of the structured buffer indexing operations. The only wrinkle there is that there were some *old* special-case instructions in the IR intended to handle buffer load/store operations (these were added back when I was trying to keep the "VM" path working). These aren't really needed to have structured-buffer operations work; they can be handled as ordinary functions as far as the stdlib is concerned. I removed the old instructions. Along the way, it became clear that a few other cases follow the same pattern. Byte-addressed buffers are an obvious case. We were lowering HLSL/Slang: ```hlsl ByteAddressBuffer b; ... uint x = b.Load(0); ``` to GLSL like: ```glsl layout(std430) buffer _S1 { uint b[]; }; ... uint x = b[0]; ``` That logic would fail for arrays the same way that the structured buffer case was failing. The fix is the same: use named `buffer` blocks and then introduce an explicit `_data` field: ```glsl layout(std430) buffer _S1 { uint _data[]; } b; ... uint x = b._data[0]; ``` Just like with structured buffers, all of the VK translation for operations on byte-addressed buffers can be implemented directly in teh stdlib, so once the emit logic was changed it was just a matter of adding `._data` to a bunch of VK tranlsations. It turns out that arrays of constant buffers have more or less the same problem, and furthermore we have some problems with any code that directly uses the modern HLSL `ConstantBuffer<T>` type. Note: the emit logic around constant buffers sometimes refers to "parameter groups" because that is being used in the compiler as a catch-all term for constant buffers, texture buffers, and parameter blocks. The existing code was going out of its way to reproduce the way that constant buffer declarations are implicitly referenced in HLSL: ```hlsl cbuffer C { float f; } ... float tmp = f; // No reference to `C` here ``` This can be seen in the emit logic with the `isDerefBaseImplicit` function, which is used to take the internal IR representation for a reference to `f` (which is closer to the expression `(*C).f` or `C->f`) and leave off any reference to `C` so that we emit just `f`. That kind of logic just flat out doesn't work in some important cases. Arrays of constant buffers are a clear one: ```hlsl ConstantBuffer<X> cbArray[3]; ... X x = cbArray[0]; ``` There is no way to translate that to an ordinary `cbuffer` declaration at all. The same problem can be created without arrays, though: ```hlsl ConstantBuffer<X> singleCB; ... X x = singleCB; ``` The current strategy for translating constant buffers was translating `singleCB` into a `cbuffer` declaration that reproduced the fields of `X` as its members, which just wouldn't work: ```hlsl cbuffer singleCB { float f; // field of `X` } ... X x = singleCB; // ERROR: there is nothing named `singleCB` in this HLSL ``` The new strategy is more consistent. We still generate a `cbuffer` declaration for a single constant buffer, but we always give it a single field of the chosen element type: ```hlsl cbuffer singleCB { X singleCB; } ... X x = singleCB; // this works fine! ``` And in the array case we generate code that uses the explicit `ConstantBuffer<T>` type: ```hlsl ConstantBuffer<X> cbArray[3]; ... X x = cbArray[0]; ``` The GLSL output is more complicated because unlike with HLSL there is no implicit conversion from a uniform block to its element type (there is no notion of an element type). The array case thus needs a `_data` field similar to what we do for structured buffers: ```glsl layout(std140) uniform _S3 { X _data; } cbArray[3]; ... X x = cbArray[0]._data; ``` And then the non-array case needs to have a similar `_data` field for consistency: ```glsl layout(std140) uniform _S1 { X _data; } singleCB; ... X x = singleCB._data; ``` This is handled by inserting the necessary reference to `_data` whenever we dereference a constant buffer, either as part of a load instruction (loading from the whole CB as a pointer), or an `IRFieldAddress` instruction which forms a pointer into the CB (e.g., `&(singleCB->f)` becomes `singleCB._data.f`). The current emit logic handles `ParameterBlock<X>` differently from `ConstantBuffer<X>`, but really only to allow parameter blocks to be explicitly named in the output, while constant buffers were left implicit by default. Thus the only difference was a legacy one (from back when trying to exactly reproduce the HLSL text we got as input was considered an important goal), and the new approach to emitting constant buffers would get rid of it. I removed the separate logic for emitting `ParameterBlock<X>` and just let the handling for constant buffers deal with it. Note that any resource types inside of a `ParameterBlock<X>` would have been moved out as part of legalization, so that a parameter block is 100% equivalent to a constant buffer when it comes time to emit code. Unsurprisingly, changing the way we generate HLSL and GLSL output for all these buffer types meant that any tests that were directly comparing the output of `slangc` against `fxc`, `dxc`, or `glslang` broke. The basic approach to fixing the breakage in GLSL tests was to update the GLSL baseline to reflect the new output startegy. In some cases I used macros to name the various `_S<digits>` temporaries so that future renaming will hopefully be easier (it would be great if we auto-generated temporary names with a bit more context). There was one GLSL test (`tests/bugs/vk-structured-buffer-binding`) that was using raw GLSL expected output, and this was changed to use a GLSL baseline to generate SPIR-V for comparison. For HLSL tests we were sometimes running the same input file through `slangc` and `fxc`/`dxc`, and in these cases I macro-ized the various `cbuffer` declarations to generate different declarations depending on the compiler. I completely dropped the tests coming from the D3D SDK because they aren't providing much coverage, and updating them would change them so far from the original code that the purported benefit (using a body of existing shaders) would be lost. I also dropped the explicit matrix layout qualifiers in the `matrix-layout` test because the new output strategy breaks those for GLSL (you can't put matrix layout qualifiers on `struct` fields, and now the body of every constant buffer is inside a `struct`). This isn't as big of a loss as it seems, because our handling of those qualifiers wasn't really right to begin with. Slang users should only be setting the matrix layout mode globally (and we should probably switch to error out on the explicit qualifiers for now). The other thing that got dropped is tests involving `packoffset` modifiers. Slang already warns that it doesn't support these, and the way they were used in the test cases is actually misleading. For the binding/layout-related tests, the goal was to show that Slang reproduces the same layout as fxc, in which case explicitly enforcing a layout via `packoffset` seems like cheating (are we sure we enforced the layout fxc would have produced?). The real reason was that Slang used to emit explicit `packoffset` on *every* field of a `cbuffer` it would output, because of an `fxc` bug where you couldn't use `register` on textures/samplers declared inside a `cbuffer` unless *every* field in the `cbuffer` used a `register` or `packoffset` modifier. Slang hasn't required that behavior in a while because it now splits textures and samplers, and the one test case where we needed `packoffset` to work around the `fxc` bug in the baseline HLSL has been macro-ified even more to work around the bug. The amount of churn in the test cases is unfortunate, but it continues to point at the weakness of any testing strategy that checks for exact equivalent between Slang's output and that of other compilers. We need to keep working to replace these tests with better alternatives. In `check.cpp` there is logic to perform implicit dereferencing, so that if you write `obj.f` where `obj` is a `ConstantBuffer<X>` (or some other "pointer-like" type) and `f` is a field in `X`, then this effectively translates as `(*obj).f`. That is, we dereference the value of type `ConstantBuffer<X>` to get a value of type `X`, and then refer to the field of the `X` value. There was a problem where the logic to insert that kind of implicit dereference operation was using a reference (`auto& type = ...`) for the type of the expression being dereferenced, and then clobbering it. This would mean that an expression of type `ConstantBuffer<X>` would have its type overwritten to be just `X` and then codegen would break later on. I'm not sure how we haven't run into that before. The `array-of-buffers` test case was added to confirm that we now support arrays of constant, structured, and byte-address buffers for both DXIL and SPIR-V output. Okay, so that was a lot of stuff, but hopefully it is clear how this all works to make the output of the compiler more consistent and explicit, while also supporting the required new functionality. * fixup: review feedback
Diffstat (limited to 'tests/hlsl/dxsdk/FluidCS11')
-rw-r--r--tests/hlsl/dxsdk/FluidCS11/ComputeShaderSort11.hlsl75
-rw-r--r--tests/hlsl/dxsdk/FluidCS11/FluidCS11.hlsl529
-rw-r--r--tests/hlsl/dxsdk/FluidCS11/FluidRender.hlsl124
3 files changed, 0 insertions, 728 deletions
diff --git a/tests/hlsl/dxsdk/FluidCS11/ComputeShaderSort11.hlsl b/tests/hlsl/dxsdk/FluidCS11/ComputeShaderSort11.hlsl
deleted file mode 100644
index 6e14bc10e..000000000
--- a/tests/hlsl/dxsdk/FluidCS11/ComputeShaderSort11.hlsl
+++ /dev/null
@@ -1,75 +0,0 @@
-//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues.
-//TEST:COMPARE_HLSL: -profile cs_4_0 -entry BitonicSort -entry MatrixTranspose
-//--------------------------------------------------------------------------------------
-// File: ComputeShaderSort11.hlsl
-//
-// This file contains the compute shaders to perform GPU sorting using DirectX 11.
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-#define BITONIC_BLOCK_SIZE 512
-
-#define TRANSPOSE_BLOCK_SIZE 16
-
-//--------------------------------------------------------------------------------------
-// Constant Buffers
-//--------------------------------------------------------------------------------------
-cbuffer CB : register( b0 )
-{
- unsigned int g_iLevel;
- unsigned int g_iLevelMask;
- unsigned int g_iWidth;
- unsigned int g_iHeight;
-};
-
-//--------------------------------------------------------------------------------------
-// Structured Buffers
-//--------------------------------------------------------------------------------------
-StructuredBuffer<unsigned int> Input : register( t0 );
-RWStructuredBuffer<unsigned int> Data : register( u0 );
-
-//--------------------------------------------------------------------------------------
-// Bitonic Sort Compute Shader
-//--------------------------------------------------------------------------------------
-groupshared unsigned int shared_data[BITONIC_BLOCK_SIZE];
-
-[numthreads(BITONIC_BLOCK_SIZE, 1, 1)]
-void BitonicSort( uint3 Gid : SV_GroupID,
- uint3 DTid : SV_DispatchThreadID,
- uint3 GTid : SV_GroupThreadID,
- uint GI : SV_GroupIndex )
-{
- // Load shared data
- shared_data[GI] = Data[DTid.x];
- GroupMemoryBarrierWithGroupSync();
-
- // Sort the shared data
- for (unsigned int j = g_iLevel >> 1 ; j > 0 ; j >>= 1)
- {
- unsigned int result = ((shared_data[GI & ~j] <= shared_data[GI | j]) == (bool)(g_iLevelMask & DTid.x))? shared_data[GI ^ j] : shared_data[GI];
- GroupMemoryBarrierWithGroupSync();
- shared_data[GI] = result;
- GroupMemoryBarrierWithGroupSync();
- }
-
- // Store shared data
- Data[DTid.x] = shared_data[GI];
-}
-
-//--------------------------------------------------------------------------------------
-// Matrix Transpose Compute Shader
-//--------------------------------------------------------------------------------------
-groupshared unsigned int transpose_shared_data[TRANSPOSE_BLOCK_SIZE * TRANSPOSE_BLOCK_SIZE];
-
-[numthreads(TRANSPOSE_BLOCK_SIZE, TRANSPOSE_BLOCK_SIZE, 1)]
-void MatrixTranspose( uint3 Gid : SV_GroupID,
- uint3 DTid : SV_DispatchThreadID,
- uint3 GTid : SV_GroupThreadID,
- uint GI : SV_GroupIndex )
-{
- transpose_shared_data[GI] = Input[DTid.y * g_iWidth + DTid.x];
- GroupMemoryBarrierWithGroupSync();
- uint2 XY = DTid.yx - GTid.yx + GTid.xy;
- Data[XY.y * g_iHeight + XY.x] = transpose_shared_data[GTid.x * TRANSPOSE_BLOCK_SIZE + GTid.y];
-}
diff --git a/tests/hlsl/dxsdk/FluidCS11/FluidCS11.hlsl b/tests/hlsl/dxsdk/FluidCS11/FluidCS11.hlsl
deleted file mode 100644
index 8966ea3c1..000000000
--- a/tests/hlsl/dxsdk/FluidCS11/FluidCS11.hlsl
+++ /dev/null
@@ -1,529 +0,0 @@
-//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues.
-//TEST:COMPARE_HLSL: -profile cs_4_0 -entry BuildGridCS -entry ClearGridIndicesCS -entry BuildGridIndicesCS -entry RearrangeParticlesCS -entry DensityCS_Simple -entry DensityCS_Shared -entry DensityCS_Grid -entry ForceCS_Simple -entry ForceCS_Shared -entry ForceCS_Grid -entry IntegrateCS
-//--------------------------------------------------------------------------------------
-// File: FluidCS11.hlsl
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-//--------------------------------------------------------------------------------------
-// Smoothed Particle Hydrodynamics Algorithm Based Upon:
-// Particle-Based Fluid Simulation for Interactive Applications
-// Matthias Müller
-//--------------------------------------------------------------------------------------
-
-//--------------------------------------------------------------------------------------
-// Optimized Grid Algorithm Based Upon:
-// Broad-Phase Collision Detection with CUDA
-// Scott Le Grand
-//--------------------------------------------------------------------------------------
-
-struct Particle
-{
- float2 position;
- float2 velocity;
-};
-
-struct ParticleForces
-{
- float2 acceleration;
-};
-
-struct ParticleDensity
-{
- float density;
-};
-
-cbuffer cbSimulationConstants : register( b0 )
-{
- uint g_iNumParticles;
- float g_fTimeStep;
- float g_fSmoothlen;
- float g_fPressureStiffness;
- float g_fRestDensity;
- float g_fDensityCoef;
- float g_fGradPressureCoef;
- float g_fLapViscosityCoef;
- float g_fWallStiffness;
-
- float4 g_vGravity;
- float4 g_vGridDim;
- float3 g_vPlanes[4];
-};
-
-//--------------------------------------------------------------------------------------
-// Fluid Simulation
-//--------------------------------------------------------------------------------------
-
-#define SIMULATION_BLOCK_SIZE 256
-
-//--------------------------------------------------------------------------------------
-// Structured Buffers
-//--------------------------------------------------------------------------------------
-RWStructuredBuffer<Particle> ParticlesRW : register( u0 );
-StructuredBuffer<Particle> ParticlesRO : register( t0 );
-
-RWStructuredBuffer<ParticleDensity> ParticlesDensityRW : register( u0 );
-StructuredBuffer<ParticleDensity> ParticlesDensityRO : register( t1 );
-
-RWStructuredBuffer<ParticleForces> ParticlesForcesRW : register( u0 );
-StructuredBuffer<ParticleForces> ParticlesForcesRO : register( t2 );
-
-RWStructuredBuffer<unsigned int> GridRW : register( u0 );
-StructuredBuffer<unsigned int> GridRO : register( t3 );
-
-RWStructuredBuffer<uint2> GridIndicesRW : register( u0 );
-StructuredBuffer<uint2> GridIndicesRO : register( t4 );
-
-
-//--------------------------------------------------------------------------------------
-// Grid Construction
-//--------------------------------------------------------------------------------------
-
-// For simplicity, this sample uses a 16-bit hash based on the grid cell and
-// a 16-bit particle ID to keep track of the particles while sorting
-// This imposes a limitation of 64K particles and 256x256 grid work
-// You could extended the implementation to support large scenarios by using a uint2
-
-float2 GridCalculateCell(float2 position)
-{
- return clamp(position * g_vGridDim.xy + g_vGridDim.zw, float2(0, 0), float2(255, 255));
-}
-
-unsigned int GridConstuctKey(uint2 xy)
-{
- // Bit pack [-----UNUSED-----][----Y---][----X---]
- // 16-bit 8-bit 8-bit
- return dot(xy.yx, uint2(256, 1));
-}
-
-unsigned int GridConstuctKeyValuePair(uint2 xy, uint value)
-{
- // Bit pack [----Y---][----X---][-----VALUE------]
- // 8-bit 8-bit 16-bit
- return dot(uint3(xy.yx, value), uint3(256*256*256, 256*256, 1));
-}
-
-unsigned int GridGetKey(unsigned int keyvaluepair)
-{
- return (keyvaluepair >> 16);
-}
-
-unsigned int GridGetValue(unsigned int keyvaluepair)
-{
- return (keyvaluepair & 0xFFFF);
-}
-
-
-//--------------------------------------------------------------------------------------
-// Build Grid
-//--------------------------------------------------------------------------------------
-
-[numthreads(SIMULATION_BLOCK_SIZE, 1, 1)]
-void BuildGridCS( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
-{
- const unsigned int P_ID = DTid.x; // Particle ID to operate on
-
- float2 position = ParticlesRO[P_ID].position;
- float2 grid_xy = GridCalculateCell( position );
-
- GridRW[P_ID] = GridConstuctKeyValuePair((uint2)grid_xy, P_ID);
-}
-
-
-//--------------------------------------------------------------------------------------
-// Build Grid Indices
-//--------------------------------------------------------------------------------------
-
-[numthreads(SIMULATION_BLOCK_SIZE, 1, 1)]
-void ClearGridIndicesCS( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
-{
- GridIndicesRW[DTid.x] = uint2(0, 0);
-}
-
-[numthreads(SIMULATION_BLOCK_SIZE, 1, 1)]
-void BuildGridIndicesCS( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
-{
- const unsigned int G_ID = DTid.x; // Grid ID to operate on
- unsigned int G_ID_PREV = (G_ID == 0)? g_iNumParticles : G_ID; G_ID_PREV--;
- unsigned int G_ID_NEXT = G_ID + 1; if (G_ID_NEXT == g_iNumParticles) { G_ID_NEXT = 0; }
-
- unsigned int cell = GridGetKey( GridRO[G_ID] );
- unsigned int cell_prev = GridGetKey( GridRO[G_ID_PREV] );
- unsigned int cell_next = GridGetKey( GridRO[G_ID_NEXT] );
- if (cell != cell_prev)
- {
- // I'm the start of a cell
- GridIndicesRW[cell].x = G_ID;
- }
- if (cell != cell_next)
- {
- // I'm the end of a cell
- GridIndicesRW[cell].y = G_ID + 1;
- }
-}
-
-
-//--------------------------------------------------------------------------------------
-// Rearrange Particles
-//--------------------------------------------------------------------------------------
-
-[numthreads(SIMULATION_BLOCK_SIZE, 1, 1)]
-void RearrangeParticlesCS( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
-{
- const unsigned int ID = DTid.x; // Particle ID to operate on
- const unsigned int G_ID = GridGetValue( GridRO[ ID ] );
- ParticlesRW[ID] = ParticlesRO[ G_ID ];
-}
-
-
-//--------------------------------------------------------------------------------------
-// Density Calculation
-//--------------------------------------------------------------------------------------
-
-float CalculateDensity(float r_sq)
-{
- const float h_sq = g_fSmoothlen * g_fSmoothlen;
- // Implements this equation:
- // W_poly6(r, h) = 315 / (64 * pi * h^9) * (h^2 - r^2)^3
- // g_fDensityCoef = fParticleMass * 315.0f / (64.0f * PI * fSmoothlen^9)
- return g_fDensityCoef * (h_sq - r_sq) * (h_sq - r_sq) * (h_sq - r_sq);
-}
-
-
-//--------------------------------------------------------------------------------------
-// Simple N^2 Algorithm
-//--------------------------------------------------------------------------------------
-
-[numthreads(SIMULATION_BLOCK_SIZE, 1, 1)]
-void DensityCS_Simple( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
-{
- const unsigned int P_ID = DTid.x;
- const float h_sq = g_fSmoothlen * g_fSmoothlen;
- float2 P_position = ParticlesRO[P_ID].position;
-
- float density = 0;
-
- // Calculate the density based on all neighbors
- for (uint N_ID = 0 ; N_ID < g_iNumParticles ; N_ID++)
- {
- float2 N_position = ParticlesRO[N_ID].position;
-
- float2 diff = N_position - P_position;
- float r_sq = dot(diff, diff);
- if (r_sq < h_sq)
- {
- density += CalculateDensity(r_sq);
- }
- }
-
- ParticlesDensityRW[P_ID].density = density;
-}
-
-
-//--------------------------------------------------------------------------------------
-// Shared Memory Optimized N^2 Algorithm
-//--------------------------------------------------------------------------------------
-
-groupshared float2 density_shared_pos[SIMULATION_BLOCK_SIZE];
-
-[numthreads(SIMULATION_BLOCK_SIZE, 1, 1)]
-void DensityCS_Shared( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
-{
- const unsigned int P_ID = DTid.x;
- const float h_sq = g_fSmoothlen * g_fSmoothlen;
- float2 P_position = ParticlesRO[P_ID].position;
-
- float density = 0;
-
- // Calculate the density based on all neighbors
- [loop]
- for (uint N_block_ID = 0 ; N_block_ID < g_iNumParticles ; N_block_ID += SIMULATION_BLOCK_SIZE)
- {
- // Cache a tile of particles unto shared memory to increase IO efficiency
- density_shared_pos[GI] = ParticlesRO[N_block_ID + GI].position;
-
- GroupMemoryBarrierWithGroupSync();
-
- for (uint N_tile_ID = 0; N_tile_ID < SIMULATION_BLOCK_SIZE; N_tile_ID++)
- {
- float2 N_position = density_shared_pos[N_tile_ID];
-
- float2 diff = N_position - P_position;
- float r_sq = dot(diff, diff);
- if (r_sq < h_sq)
- {
- density += CalculateDensity(r_sq);
- }
- }
-
- GroupMemoryBarrierWithGroupSync();
- }
-
- ParticlesDensityRW[P_ID].density = density;
-}
-
-
-//--------------------------------------------------------------------------------------
-// Optimized Grid + Sort Algorithm
-//--------------------------------------------------------------------------------------
-
-[numthreads(SIMULATION_BLOCK_SIZE, 1, 1)]
-void DensityCS_Grid( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
-{
- const unsigned int P_ID = DTid.x;
- const float h_sq = g_fSmoothlen * g_fSmoothlen;
- float2 P_position = ParticlesRO[P_ID].position;
-
- float density = 0;
-
- // Calculate the density based on neighbors from the 8 adjacent cells + current cell
- int2 G_XY = (int2)GridCalculateCell( P_position );
- for (int Y = max(G_XY.y - 1, 0) ; Y <= min(G_XY.y + 1, 255) ; Y++)
- {
- for (int X = max(G_XY.x - 1, 0) ; X <= min(G_XY.x + 1, 255) ; X++)
- {
- unsigned int G_CELL = GridConstuctKey(uint2(X, Y));
- uint2 G_START_END = GridIndicesRO[G_CELL];
- for (unsigned int N_ID = G_START_END.x ; N_ID < G_START_END.y ; N_ID++)
- {
- float2 N_position = ParticlesRO[N_ID].position;
-
- float2 diff = N_position - P_position;
- float r_sq = dot(diff, diff);
- if (r_sq < h_sq)
- {
- density += CalculateDensity(r_sq);
- }
- }
- }
- }
-
- ParticlesDensityRW[P_ID].density = density;
-}
-
-
-//--------------------------------------------------------------------------------------
-// Force Calculation
-//--------------------------------------------------------------------------------------
-
-float CalculatePressure(float density)
-{
- // Implements this equation:
- // Pressure = B * ((rho / rho_0)^y - 1)
- return g_fPressureStiffness * max(pow(density / g_fRestDensity, 3) - 1, 0);
-}
-
-float2 CalculateGradPressure(float r, float P_pressure, float N_pressure, float N_density, float2 diff)
-{
- const float h = g_fSmoothlen;
- float avg_pressure = 0.5f * (N_pressure + P_pressure);
- // Implements this equation:
- // W_spkiey(r, h) = 15 / (pi * h^6) * (h - r)^3
- // GRAD( W_spikey(r, h) ) = -45 / (pi * h^6) * (h - r)^2
- // g_fGradPressureCoef = fParticleMass * -45.0f / (PI * fSmoothlen^6)
- return g_fGradPressureCoef * avg_pressure / N_density * (h - r) * (h - r) / r * (diff);
-}
-
-float2 CalculateLapVelocity(float r, float2 P_velocity, float2 N_velocity, float N_density)
-{
- const float h = g_fSmoothlen;
- float2 vel_diff = (N_velocity - P_velocity);
- // Implements this equation:
- // W_viscosity(r, h) = 15 / (2 * pi * h^3) * (-r^3 / (2 * h^3) + r^2 / h^2 + h / (2 * r) - 1)
- // LAPLACIAN( W_viscosity(r, h) ) = 45 / (pi * h^6) * (h - r)
- // g_fLapViscosityCoef = fParticleMass * fViscosity * 45.0f / (PI * fSmoothlen^6)
- return g_fLapViscosityCoef / N_density * (h - r) * vel_diff;
-}
-
-
-//--------------------------------------------------------------------------------------
-// Simple N^2 Algorithm
-//--------------------------------------------------------------------------------------
-
-[numthreads(SIMULATION_BLOCK_SIZE, 1, 1)]
-void ForceCS_Simple( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
-{
- const unsigned int P_ID = DTid.x; // Particle ID to operate on
-
- float2 P_position = ParticlesRO[P_ID].position;
- float2 P_velocity = ParticlesRO[P_ID].velocity;
- float P_density = ParticlesDensityRO[P_ID].density;
- float P_pressure = CalculatePressure(P_density);
-
- const float h_sq = g_fSmoothlen * g_fSmoothlen;
-
- float2 acceleration = float2(0, 0);
-
- // Calculate the acceleration based on all neighbors
- for (uint N_ID = 0 ; N_ID < g_iNumParticles ; N_ID++)
- {
- float2 N_position = ParticlesRO[N_ID].position;
-
- float2 diff = N_position - P_position;
- float r_sq = dot(diff, diff);
- if (r_sq < h_sq && P_ID != N_ID)
- {
- float2 N_velocity = ParticlesRO[N_ID].velocity;
- float N_density = ParticlesDensityRO[N_ID].density;
- float N_pressure = CalculatePressure(N_density);
- float r = sqrt(r_sq);
-
- // Pressure Term
- acceleration += CalculateGradPressure(r, P_pressure, N_pressure, N_density, diff);
-
- // Viscosity Term
- acceleration += CalculateLapVelocity(r, P_velocity, N_velocity, N_density);
- }
- }
-
- ParticlesForcesRW[P_ID].acceleration = acceleration / P_density;
-}
-
-
-//--------------------------------------------------------------------------------------
-// Shared Memory Optimized N^2 Algorithm
-//--------------------------------------------------------------------------------------
-
-groupshared struct { float2 position; float2 velocity; float density; } force_shared_pos[SIMULATION_BLOCK_SIZE];
-
-[numthreads(SIMULATION_BLOCK_SIZE, 1, 1)]
-void ForceCS_Shared( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
-{
- const unsigned int P_ID = DTid.x; // Particle ID to operate on
-
- float2 P_position = ParticlesRO[P_ID].position;
- float2 P_velocity = ParticlesRO[P_ID].velocity;
- float P_density = ParticlesDensityRO[P_ID].density;
- float P_pressure = CalculatePressure(P_density);
-
- const float h_sq = g_fSmoothlen * g_fSmoothlen;
-
- float2 acceleration = float2(0, 0);
-
- // Calculate the acceleration based on all neighbors
- [loop]
- for (uint N_block_ID = 0 ; N_block_ID < g_iNumParticles ; N_block_ID += SIMULATION_BLOCK_SIZE)
- {
- // Cache a tile of particles unto shared memory to increase IO efficiency
- force_shared_pos[GI].position = ParticlesRO[N_block_ID + GI].position;
- force_shared_pos[GI].velocity = ParticlesRO[N_block_ID + GI].velocity;
- force_shared_pos[GI].density = ParticlesDensityRO[N_block_ID + GI].density;
-
- GroupMemoryBarrierWithGroupSync();
-
- [loop]
- for (uint N_tile_ID = 0; N_tile_ID < SIMULATION_BLOCK_SIZE; N_tile_ID++ )
- {
- uint N_ID = N_block_ID + N_tile_ID;
- float2 N_position = force_shared_pos[N_tile_ID].position;
-
- float2 diff = N_position - P_position;
- float r_sq = dot(diff, diff);
- if (r_sq < h_sq && P_ID != N_ID)
- {
- float2 N_velocity = force_shared_pos[N_tile_ID].velocity;
- float N_density = force_shared_pos[N_tile_ID].density;
- float N_pressure = CalculatePressure(N_density);
- float r = sqrt(r_sq);
-
- // Pressure Term
- acceleration += CalculateGradPressure(r, P_pressure, N_pressure, N_density, diff);
-
- // Viscosity Term
- acceleration += CalculateLapVelocity(r, P_velocity, N_velocity, N_density);
- }
- }
-
- GroupMemoryBarrierWithGroupSync();
- }
-
- ParticlesForcesRW[P_ID].acceleration = acceleration / P_density;
-}
-
-
-//--------------------------------------------------------------------------------------
-// Optimized Grid + Sort Algorithm
-//--------------------------------------------------------------------------------------
-
-[numthreads(SIMULATION_BLOCK_SIZE, 1, 1)]
-void ForceCS_Grid( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
-{
- const unsigned int P_ID = DTid.x; // Particle ID to operate on
-
- float2 P_position = ParticlesRO[P_ID].position;
- float2 P_velocity = ParticlesRO[P_ID].velocity;
- float P_density = ParticlesDensityRO[P_ID].density;
- float P_pressure = CalculatePressure(P_density);
-
- const float h_sq = g_fSmoothlen * g_fSmoothlen;
-
- float2 acceleration = float2(0, 0);
-
- // Calculate the acceleration based on neighbors from the 8 adjacent cells + current cell
- int2 G_XY = (int2)GridCalculateCell( P_position );
- for (int Y = max(G_XY.y - 1, 0) ; Y <= min(G_XY.y + 1, 255) ; Y++)
- {
- for (int X = max(G_XY.x - 1, 0) ; X <= min(G_XY.x + 1, 255) ; X++)
- {
- unsigned int G_CELL = GridConstuctKey(uint2(X, Y));
- uint2 G_START_END = GridIndicesRO[G_CELL];
- for (unsigned int N_ID = G_START_END.x ; N_ID < G_START_END.y ; N_ID++)
- {
- float2 N_position = ParticlesRO[N_ID].position;
-
- float2 diff = N_position - P_position;
- float r_sq = dot(diff, diff);
- if (r_sq < h_sq && P_ID != N_ID)
- {
- float2 N_velocity = ParticlesRO[N_ID].velocity;
- float N_density = ParticlesDensityRO[N_ID].density;
- float N_pressure = CalculatePressure(N_density);
- float r = sqrt(r_sq);
-
- // Pressure Term
- acceleration += CalculateGradPressure(r, P_pressure, N_pressure, N_density, diff);
-
- // Viscosity Term
- acceleration += CalculateLapVelocity(r, P_velocity, N_velocity, N_density);
- }
- }
- }
- }
-
- ParticlesForcesRW[P_ID].acceleration = acceleration / P_density;
-}
-
-
-//--------------------------------------------------------------------------------------
-// Integration
-//--------------------------------------------------------------------------------------
-
-[numthreads(SIMULATION_BLOCK_SIZE, 1, 1)]
-void IntegrateCS( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
-{
- const unsigned int P_ID = DTid.x; // Particle ID to operate on
-
- float2 position = ParticlesRO[P_ID].position;
- float2 velocity = ParticlesRO[P_ID].velocity;
- float2 acceleration = ParticlesForcesRO[P_ID].acceleration;
-
- // Apply the forces from the map walls
- [unroll]
- for (unsigned int i = 0 ; i < 4 ; i++)
- {
- float dist = dot(float3(position, 1), g_vPlanes[i]);
- acceleration += min(dist, 0) * -g_fWallStiffness * g_vPlanes[i].xy;
- }
-
- // Apply gravity
- acceleration += g_vGravity.xy;
-
- // Integrate
- velocity += g_fTimeStep * acceleration;
- position += g_fTimeStep * velocity;
-
- // Update
- ParticlesRW[P_ID].position = position;
- ParticlesRW[P_ID].velocity = velocity;
-}
diff --git a/tests/hlsl/dxsdk/FluidCS11/FluidRender.hlsl b/tests/hlsl/dxsdk/FluidCS11/FluidRender.hlsl
deleted file mode 100644
index cfd14c2b2..000000000
--- a/tests/hlsl/dxsdk/FluidCS11/FluidRender.hlsl
+++ /dev/null
@@ -1,124 +0,0 @@
-//TEST:COMPARE_HLSL:-no-mangle -profile sm_4_0 -entry ParticleVS -stage vertex -entry ParticleGS -stage geometry -entry ParticlePS -stage pixel
-
-#ifndef __SLANG__
-#define ParticlesRO ParticlesRO_0
-#define ParticleDensityRO ParticleDensityRO_0
-#define cbRenderConstants cbRenderConstants_0
-#define g_mViewProjection g_mViewProjection_0
-#define g_fParticleSize g_fParticleSize_0
-#define density density_0
-#define position position_0
-#define velocity velocity_0
-
-#endif
-
-//--------------------------------------------------------------------------------------
-// File: FluidRender.hlsl
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-//--------------------------------------------------------------------------------------
-// Particle Rendering
-//--------------------------------------------------------------------------------------
-
-struct Particle {
- float2 position;
- float2 velocity;
-};
-
-struct ParticleDensity {
- float density;
-};
-
-StructuredBuffer<Particle> ParticlesRO : register( t0 );
-StructuredBuffer<ParticleDensity> ParticleDensityRO : register( t1 );
-
-cbuffer cbRenderConstants : register( b0 )
-{
- matrix g_mViewProjection;
- float g_fParticleSize;
-};
-
-struct VSParticleOut
-{
- float2 position : POSITION;
- float4 color : COLOR;
-};
-
-struct GSParticleOut
-{
- float4 position : SV_Position;
- float4 color : COLOR;
- float2 texcoord : TEXCOORD;
-};
-
-
-//--------------------------------------------------------------------------------------
-// Visualization Helper
-//--------------------------------------------------------------------------------------
-
-static const float4 Rainbow[5] = {
- float4(1, 0, 0, 1), // red
- float4(1, 1, 0, 1), // orange
- float4(0, 1, 0, 1), // green
- float4(0, 1, 1, 1), // teal
- float4(0, 0, 1, 1), // blue
-};
-
-float4 VisualizeNumber(float n)
-{
- return lerp( Rainbow[ int(floor(n * 4.0f)) ], Rainbow[ int(ceil(n * 4.0f)) ], frac(n * 4.0f) );
-}
-
-float4 VisualizeNumber(float n, float lower, float upper)
-{
- return VisualizeNumber( saturate( (n - lower) / (upper - lower) ) );
-}
-
-
-//--------------------------------------------------------------------------------------
-// Vertex Shader
-//--------------------------------------------------------------------------------------
-
-VSParticleOut ParticleVS(uint ID : SV_VERTEXID)
-{
- VSParticleOut Out; // = { { 0, 0 } , { 0, 0, 0, 0 } }; // (VSParticleOut)0;
- Out.position = ParticlesRO[ID].position;
- Out.color = VisualizeNumber(ParticleDensityRO[ID].density, 1000.0f, 2000.0f);
- return Out;
-}
-
-
-//--------------------------------------------------------------------------------------
-// Particle Geometry Shader
-//--------------------------------------------------------------------------------------
-
-static const float2 g_positions[4] = { float2(-1, 1), float2(1, 1), float2(-1, -1), float2(1, -1) };
-static const float2 g_texcoords[4] = { float2(0, 1), float2(1, 1), float2(0, 0), float2(1, 0) };
-
-[maxvertexcount(4)]
-void ParticleGS(point VSParticleOut In[1], inout TriangleStream<GSParticleOut> SpriteStream)
-{
- [unroll]
- for (int i = 0; i < 4; i++)
- {
- GSParticleOut Out; // = (GSParticleOut)0;
- float4 position = float4(In[0].position, 0, 1) + g_fParticleSize * float4(g_positions[i], 0, 0);
- Out.position = mul(position, g_mViewProjection);
- Out.color = In[0].color;
- Out.texcoord = g_texcoords[i];
- SpriteStream.Append(Out);
- }
- SpriteStream.RestartStrip();
-}
-
-
-//--------------------------------------------------------------------------------------
-// Pixel Shader
-//--------------------------------------------------------------------------------------
-
-float4 ParticlePS(GSParticleOut In) : SV_TARGET
-{
- return In.color;
-}