diff options
| author | Konstantin <const@const.me> | 2023-01-16 14:52:43 +0100 |
|---|---|---|
| committer | Konstantin <const@const.me> | 2023-01-16 14:52:43 +0100 |
| commit | 8c4603c73675958efc960fbd4bb599a2909d106a (patch) | |
| tree | 714dc6fc9a1672d5fd7f89676b97e10959662abc /ComputeShaders/convolutionPrep2.hlsl | |
| parent | 990a8d0dbaefc996244097397259e92758b15cce (diff) | |
Source codes
Diffstat (limited to 'ComputeShaders/convolutionPrep2.hlsl')
| -rw-r--r-- | ComputeShaders/convolutionPrep2.hlsl | 43 |
1 files changed, 43 insertions, 0 deletions
diff --git a/ComputeShaders/convolutionPrep2.hlsl b/ComputeShaders/convolutionPrep2.hlsl new file mode 100644 index 0000000..a7e7172 --- /dev/null +++ b/ComputeShaders/convolutionPrep2.hlsl @@ -0,0 +1,43 @@ +// ggml_compute_forward_conv_1d_1s_f16_f32, prepare source data (src1) +// Dispatch [ ne11, 1, 1 ] thread groups +Buffer<float> arg1: register( t0 ); +RWBuffer<float> result: register( u0 ); + +cbuffer Constants: register( b0 ) +{ + uint4 src0_elements: packoffset( c0 ); + uint4 src1_elements: packoffset( c2 ); + uint4 src1_strides: packoffset( c3 ); +} + +#include "miscUtils.hlsli" + +[ numthreads( 32, 1, 1 ) ] +void main( uint3 group: SV_GroupID, uint thread : SV_GroupIndex ) +{ + const uint i11 = group.x; + + const uint ne00 = src0_elements[ 0 ]; + const uint ne01 = src0_elements[ 1 ]; + const uint ne10 = src1_elements[ 0 ]; + const uint nb11 = src1_strides[ 1 ]; + + const uint nk = ne00; + const uint nh = nk / 2; + const int ew0 = roundUp32( ne01 ); + + uint rsi = i11 * nb11; + uint rdi = nh * ew0 + i11; + const uint rdiInc = ew0 * 32; + const uint rsiEnd = rsi + ne10; + + rsi += thread; + rdi += thread * ew0; + + for( ; rsi < rsiEnd; rsi += 32, rdi += rdiInc ) + { + float f = arg1[ rsi ]; + f = adjustFp16( f ); + result[ rdi ] = f; + } +}
\ No newline at end of file |
