diff options
| author | Yong He <yonghe@outlook.com> | 2024-06-13 16:29:58 -0700 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-06-13 16:29:58 -0700 |
| commit | cfef0c6f66c9d36ae2899c8c2790c3fe422a7700 (patch) | |
| tree | 0e116769009ae7e4b3b16d2ef28f275bea5e1fd7 /source/slang | |
| parent | 2407966e899f9e4f490b23a92fc06d5da20544cc (diff) | |
Metal: misc fixes and enable more tests. (#4374)
* Fix and enable tests for metal.
* Fix.
* Fix.
* Fix tests.
* Fix warnings.
* Fix.
---------
Co-authored-by: Yong He <yonghe@Yongs-Mac-mini.local>
Diffstat (limited to 'source/slang')
| -rw-r--r-- | source/slang/hlsl.meta.slang | 161 | ||||
| -rw-r--r-- | source/slang/slang-capabilities.capdef | 2 | ||||
| -rw-r--r-- | source/slang/slang-emit-hlsl.cpp | 2 | ||||
| -rw-r--r-- | source/slang/slang-emit-metal.cpp | 66 | ||||
| -rw-r--r-- | source/slang/slang-emit-metal.h | 2 | ||||
| -rw-r--r-- | source/slang/slang-ir-byte-address-legalize.cpp | 13 | ||||
| -rw-r--r-- | source/slang/slang-ir-insts.h | 3 | ||||
| -rw-r--r-- | source/slang/slang-ir.cpp | 12 |
8 files changed, 166 insertions, 95 deletions
diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang index 9a87604ae..10d183da2 100644 --- a/source/slang/hlsl.meta.slang +++ b/source/slang/hlsl.meta.slang @@ -1784,20 +1784,24 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,0,format> { switch (Shape.flavor) { + case $(SLANG_TEXTURE_1D): + __intrinsic_asm "$c$0.sample($1, ($2).x, uint(($2).y))$z"; case $(SLANG_TEXTURE_2D): - __intrinsic_asm "$0.sample($1, ($2).xy, uint(($2).z), level($3))"; + __intrinsic_asm "$c$0.sample($1, ($2).xy, uint(($2).z), level($3))$z"; case $(SLANG_TEXTURE_CUBE): - __intrinsic_asm "$0.sample($1, ($2).xyz, uint(($2).w), level($3))"; + __intrinsic_asm "$c$0.sample($1, ($2).xyz, uint(($2).w), level($3))$z"; } } else { switch (Shape.flavor) { + case $(SLANG_TEXTURE_1D): + __intrinsic_asm "$c$0.sample($1, $2)$z"; case $(SLANG_TEXTURE_2D): case $(SLANG_TEXTURE_3D): case $(SLANG_TEXTURE_CUBE): - __intrinsic_asm "$0.sample($1, $2, level($3))"; + __intrinsic_asm "$c$0.sample($1, $2, level($3))$z"; } } // TODO: This needs to be handled by the capability system @@ -1866,9 +1870,9 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,0,format> switch (Shape.flavor) { case $(SLANG_TEXTURE_2D): - __intrinsic_asm "$0.sample($1, ($2).xy, uint(($2).z), level($3), $4)"; + __intrinsic_asm "$c$0.sample($1, ($2).xy, uint(($2).z), level($3), $4)$z"; case $(SLANG_TEXTURE_CUBE): - __intrinsic_asm "$0.sample($1, ($2).xyz, uint(($2).w), level($3), $4)"; + __intrinsic_asm "$c$0.sample($1, ($2).xyz, uint(($2).w), level($3), $4)$z"; } } else @@ -1878,7 +1882,7 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,0,format> case $(SLANG_TEXTURE_2D): case $(SLANG_TEXTURE_3D): case $(SLANG_TEXTURE_CUBE): - __intrinsic_asm "$0.sample($1, $2, level($3), $4)"; + __intrinsic_asm "$c$0.sample($1, $2, level($3), $4)$z"; } } __intrinsic_asm "<invalid intrinsic>"; @@ -3883,7 +3887,7 @@ struct $(item.name) [__NoSideEffect] [ForceInline] - [require(cpp_cuda_glsl_hlsl_spirv, byteaddressbuffer_rw)] + [require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] uint Load(int location) { __target_switch @@ -3899,7 +3903,7 @@ struct $(item.name) [__NoSideEffect] [ForceInline] - [require(cpp_cuda_glsl_hlsl_spirv, byteaddressbuffer_rw)] + [require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] uint2 Load2(int location) { __target_switch @@ -3912,7 +3916,7 @@ struct $(item.name) [__NoSideEffect] [ForceInline] - [require(cpp_cuda_glsl_hlsl_spirv, byteaddressbuffer_rw)] + [require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] uint2 Load2(int location, int alignment) { __target_switch @@ -3925,7 +3929,7 @@ struct $(item.name) [__NoSideEffect] [ForceInline] - [require(cpp_cuda_glsl_hlsl_spirv, byteaddressbuffer_rw)] + [require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] uint2 Load2Aligned(int location) { __target_switch @@ -3941,7 +3945,7 @@ struct $(item.name) [__NoSideEffect] [ForceInline] - [require(cpp_cuda_glsl_hlsl_spirv, byteaddressbuffer_rw)] + [require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] uint3 Load3(int location) { __target_switch @@ -3954,7 +3958,7 @@ struct $(item.name) [__NoSideEffect] [ForceInline] - [require(cpp_cuda_glsl_hlsl_spirv, byteaddressbuffer_rw)] + [require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] uint3 Load3(int location, int alignment) { __target_switch @@ -3967,7 +3971,7 @@ struct $(item.name) [__NoSideEffect] [ForceInline] - [require(cpp_cuda_glsl_hlsl_spirv, byteaddressbuffer_rw)] + [require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] uint3 Load3Aligned(int location) { __target_switch @@ -3983,7 +3987,7 @@ struct $(item.name) [__NoSideEffect] [ForceInline] - [require(cpp_cuda_glsl_hlsl_spirv, byteaddressbuffer_rw)] + [require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] uint4 Load4(int location) { __target_switch @@ -3996,7 +4000,7 @@ struct $(item.name) [__NoSideEffect] [ForceInline] - [require(cpp_cuda_glsl_hlsl_spirv, byteaddressbuffer_rw)] + [require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] uint4 Load4(int location, int alignment) { __target_switch @@ -4009,7 +4013,7 @@ struct $(item.name) [__NoSideEffect] [ForceInline] - [require(cpp_cuda_glsl_hlsl_spirv, byteaddressbuffer_rw)] + [require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] uint4 Load4Aligned(int location) { __target_switch @@ -4025,7 +4029,7 @@ struct $(item.name) [__NoSideEffect] [ForceInline] - [require(cpp_cuda_glsl_hlsl_spirv, byteaddressbuffer_rw)] + [require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] T Load<T>(int location) { return __byteAddressBufferLoad<T>(this, location, 0); @@ -4033,7 +4037,7 @@ struct $(item.name) [__NoSideEffect] [ForceInline] - [require(cpp_cuda_glsl_hlsl_spirv, byteaddressbuffer_rw)] + [require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] T Load<T>(int location, int alignment) { return __byteAddressBufferLoad<T>(this, location, alignment); @@ -4041,7 +4045,7 @@ struct $(item.name) [__NoSideEffect] [ForceInline] - [require(cpp_cuda_glsl_hlsl_spirv, byteaddressbuffer_rw)] + [require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] T LoadAligned<T>(int location) { return __byteAddressBufferLoad<T>(this, location, __naturalStrideOf<T>()); @@ -4894,7 +4898,7 @@ ${{{{ } [ForceInline] - [require(cpp_cuda_glsl_hlsl_spirv, byteaddressbuffer_rw)] + [require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] void Store(uint address, uint value) { __target_switch @@ -4907,7 +4911,7 @@ ${{{{ [ForceInline] - [require(cpp_cuda_glsl_hlsl_spirv, byteaddressbuffer_rw)] + [require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] void Store2(uint address, uint2 value) { __target_switch @@ -4920,7 +4924,7 @@ ${{{{ [ForceInline] - [require(cpp_cuda_glsl_hlsl_spirv, byteaddressbuffer_rw)] + [require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] void Store2(uint address, uint2 value, uint alignment) { __target_switch @@ -4932,7 +4936,7 @@ ${{{{ } [ForceInline] - [require(cpp_cuda_glsl_hlsl_spirv, byteaddressbuffer_rw)] + [require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] void Store2Aligned(uint address, uint2 value) { __target_switch @@ -4944,7 +4948,7 @@ ${{{{ } [ForceInline] - [require(cpp_cuda_glsl_hlsl_spirv, byteaddressbuffer_rw)] + [require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] void Store3(uint address, uint3 value) { __target_switch @@ -4957,7 +4961,7 @@ ${{{{ [ForceInline] - [require(cpp_cuda_glsl_hlsl_spirv, byteaddressbuffer_rw)] + [require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] void Store3(uint address, uint3 value, uint alignment) { __target_switch @@ -4994,7 +4998,7 @@ ${{{{ [ForceInline] - [require(cpp_cuda_glsl_hlsl_spirv, byteaddressbuffer_rw)] + [require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] void Store4(uint address, uint4 value, uint alignment) { __target_switch @@ -5006,7 +5010,7 @@ ${{{{ } [ForceInline] - [require(cpp_cuda_glsl_hlsl_spirv, byteaddressbuffer_rw)] + [require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] void Store4Aligned(uint address, uint4 value) { __target_switch @@ -5685,7 +5689,7 @@ double asdouble(uint lowbits, uint highbits) // Reinterpret bits as a float (HLSL SM 4.0) [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_4_0)] float asfloat(int x) { __target_switch @@ -5694,6 +5698,7 @@ float asfloat(int x) case cuda: __intrinsic_asm "$P_asfloat($0)"; case glsl: __intrinsic_asm "intBitsToFloat"; case hlsl: __intrinsic_asm "asfloat"; + case metal: __intrinsic_asm "as_type<$TR>($0)"; case spirv: return spirv_asm { OpBitcast $$float result $x }; @@ -5701,7 +5706,7 @@ float asfloat(int x) } [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_4_0)] float asfloat(uint x) { __target_switch @@ -5710,6 +5715,7 @@ float asfloat(uint x) case cuda: __intrinsic_asm "$P_asfloat($0)"; case glsl: __intrinsic_asm "uintBitsToFloat"; case hlsl: __intrinsic_asm "asfloat"; + case metal: __intrinsic_asm "as_type<$TR>($0)"; case spirv: return spirv_asm { OpBitcast $$float result $x }; @@ -5718,13 +5724,14 @@ float asfloat(uint x) __generic<let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_4_0)] vector<float, N> asfloat(vector< int, N> x) { __target_switch { case glsl: __intrinsic_asm "intBitsToFloat"; case hlsl: __intrinsic_asm "asfloat"; + case metal: __intrinsic_asm "as_type<$TR>($0)"; case spirv: return spirv_asm { OpBitcast $$vector<float, N> result $x }; @@ -5735,13 +5742,14 @@ vector<float, N> asfloat(vector< int, N> x) __generic<let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_4_0)] vector<float,N> asfloat(vector<uint,N> x) { __target_switch { case glsl: __intrinsic_asm "uintBitsToFloat"; case hlsl: __intrinsic_asm "asfloat"; + case metal: __intrinsic_asm "as_type<$TR>($0)"; case spirv: return spirv_asm { OpBitcast $$vector<float,N> result $x }; @@ -5779,21 +5787,18 @@ matrix<float,N,M> asfloat(matrix<uint,N,M> x) // No op [__unsafeForceInlineEarly] [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_4_0_version)] float asfloat(float x) { return x; } __generic<let N : int> [__unsafeForceInlineEarly] [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_4_0_version)] vector<float,N> asfloat(vector<float,N> x) { return x; } __generic<let N : int, let M : int> [__unsafeForceInlineEarly] [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_4_0_version)] matrix<float,N,M> asfloat(matrix<float,N,M> x) { return x; } @@ -5890,7 +5895,7 @@ vector<T,N> asinh(vector<T,N> x) // Reinterpret bits as an int (HLSL SM 4.0) [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_4_0)] int asint(float x) { __target_switch @@ -5899,6 +5904,7 @@ int asint(float x) case cuda: __intrinsic_asm "$P_asint($0)"; case glsl: __intrinsic_asm "floatBitsToInt"; case hlsl: __intrinsic_asm "asint"; + case metal: __intrinsic_asm "as_type<$TR>($0)"; case spirv: return spirv_asm { OpBitcast $$int result $x }; @@ -5906,7 +5912,7 @@ int asint(float x) } [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_4_0)] int asint(uint x) { __target_switch @@ -5915,6 +5921,7 @@ int asint(uint x) case cuda: __intrinsic_asm "$P_asint($0)"; case glsl: __intrinsic_asm "int($0)"; case hlsl: __intrinsic_asm "asint"; + case metal: __intrinsic_asm "as_type<$TR>($0)"; case spirv: return spirv_asm { OpBitcast $$int result $x }; @@ -5923,13 +5930,14 @@ int asint(uint x) __generic<let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_4_0)] vector<int, N> asint(vector<float, N> x) { __target_switch { case glsl: __intrinsic_asm "floatBitsToInt"; case hlsl: __intrinsic_asm "asint"; + case metal: __intrinsic_asm "as_type<$TR>($0)"; case spirv: return spirv_asm { OpBitcast $$vector<int, N> result $x }; @@ -5940,13 +5948,14 @@ vector<int, N> asint(vector<float, N> x) __generic<let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_4_0)] vector<int, N> asint(vector<uint, N> x) { __target_switch { case glsl: __intrinsic_asm "ivec$N0($0)"; case hlsl: __intrinsic_asm "asint"; + case metal: __intrinsic_asm "as_type<$TR>($0)"; case spirv: return spirv_asm { OpBitcast $$vector<int, N> result $x }; @@ -5984,21 +5993,18 @@ matrix<int, N, M> asint(matrix<uint, N, M> x) // No op [__unsafeForceInlineEarly] [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)] int asint(int x) { return x; } __generic<let N : int> [__unsafeForceInlineEarly] [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)] vector<int,N> asint(vector<int,N> x) { return x; } __generic<let N : int, let M : int> [__unsafeForceInlineEarly] [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)] matrix<int,N,M> asint(matrix<int,N,M> x) { return x; } @@ -6030,7 +6036,7 @@ void asuint(double value, out uint lowbits, out uint highbits) // Reinterpret bits as a uint (HLSL SM 4.0) [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_4_0)] uint asuint(float x) { __target_switch @@ -6039,6 +6045,7 @@ uint asuint(float x) case cuda: __intrinsic_asm "$P_asuint($0)"; case glsl: __intrinsic_asm "floatBitsToUint"; case hlsl: __intrinsic_asm "asuint"; + case metal: __intrinsic_asm "as_type<$TR>($0)"; case spirv: return spirv_asm { OpBitcast $$uint result $x }; @@ -6046,7 +6053,7 @@ uint asuint(float x) } [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_4_0)] uint asuint(int x) { __target_switch @@ -6055,6 +6062,7 @@ uint asuint(int x) case cuda: __intrinsic_asm "$P_asuint($0)"; case glsl: __intrinsic_asm "uint($0)"; case hlsl: __intrinsic_asm "asuint"; + case metal: __intrinsic_asm "as_type<$TR>($0)"; case spirv: return spirv_asm { OpBitcast $$uint result $x }; @@ -6063,13 +6071,14 @@ uint asuint(int x) __generic<let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_4_0)] vector<uint,N> asuint(vector<float,N> x) { __target_switch { case glsl: __intrinsic_asm "floatBitsToUint"; case hlsl: __intrinsic_asm "asuint"; + case metal: __intrinsic_asm "as_type<$TR>($0)"; case spirv: return spirv_asm { OpBitcast $$vector<uint,N> result $x }; @@ -6080,13 +6089,14 @@ vector<uint,N> asuint(vector<float,N> x) __generic<let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_4_0)] vector<uint, N> asuint(vector<int, N> x) { __target_switch { case glsl: __intrinsic_asm "uvec$N0($0)"; case hlsl: __intrinsic_asm "asuint"; + case metal: __intrinsic_asm "as_type<$TR>($0)"; case spirv: return spirv_asm { OpBitcast $$vector<uint, N> result $x }; @@ -6246,13 +6256,14 @@ matrix<float16_t,R,C> asfloat16<let R : int, let C : int>(matrix<uint16_t,R,C> v [__unsafeForceInlineEarly] [__readNone] -[require(cuda_hlsl_spirv, shader5_sm_5_0)] +[require(cuda_hlsl_metal_spirv, shader5_sm_5_0)] int16_t asint16(float16_t value) { __target_switch { case cuda: __intrinsic_asm "__half_as_short"; case hlsl: __intrinsic_asm "asint16"; + case metal: __intrinsic_asm "as_type<$TR>($0)"; case spirv: return spirv_asm { OpBitcast $$int16_t result $value }; @@ -6262,12 +6273,13 @@ int16_t asint16(float16_t value) [__unsafeForceInlineEarly] [__readNone] -[require(cuda_hlsl_spirv, shader5_sm_5_0)] +[require(cuda_hlsl_metal_spirv, shader5_sm_5_0)] vector<int16_t,N> asint16<let N : int>(vector<float16_t,N> value) { __target_switch { case hlsl: __intrinsic_asm "asint16"; + case metal: __intrinsic_asm "as_type<$TR>($0)"; default: return asuint16(value); } } @@ -6286,13 +6298,14 @@ matrix<int16_t,R,C> asint16<let R : int, let C : int>(matrix<float16_t,R,C> valu [__readNone] [__unsafeForceInlineEarly] -[require(cuda_hlsl_spirv, shader5_sm_5_0)] +[require(cuda_hlsl_metal_spirv, shader5_sm_5_0)] float16_t asfloat16(int16_t value) { __target_switch { case cuda: __intrinsic_asm "__short_as_half"; case hlsl: __intrinsic_asm "asfloat16"; + case metal: __intrinsic_asm "as_type<$TR>($0)"; case spirv: return spirv_asm { OpBitcast $$float16_t result $value }; @@ -6302,12 +6315,13 @@ float16_t asfloat16(int16_t value) [__unsafeForceInlineEarly] [__readNone] -[require(cuda_hlsl_spirv, shader5_sm_5_0)] +[require(cuda_hlsl_metal_spirv, shader5_sm_5_0)] vector<float16_t,N> asfloat16<let N : int>(vector<int16_t,N> value) { __target_switch { case hlsl: __intrinsic_asm "asfloat16"; + case metal: __intrinsic_asm "as_type<$TR>($0)"; case spirv: return spirv_asm { OpBitcast $$vector<float16_t,N> result $value }; @@ -7687,7 +7701,7 @@ vector<T,N> exp10(vector<T,N> x) __glsl_version(420) __cuda_sm_version(6.0) [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_5_0)] float f16tof32(uint value) { __target_switch @@ -7696,6 +7710,7 @@ float f16tof32(uint value) case hlsl: __intrinsic_asm "f16tof32($0)"; case cuda: __intrinsic_asm "__half2float(__ushort_as_half($0))"; case cpp: __intrinsic_asm "f16tof32($0)"; + case metal: __intrinsic_asm "as_type<half>((ushort)($0))"; case spirv: { return spirv_asm { @@ -7709,7 +7724,7 @@ float f16tof32(uint value) __generic<let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_5_0)] vector<float, N> f16tof32(vector<uint, N> value) { __target_switch @@ -7734,7 +7749,7 @@ vector<float, N> f16tof32(vector<uint, N> value) __glsl_version(420) __cuda_sm_version(6.0) [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_5_0)] uint f32tof16(float value) { __target_switch @@ -7743,6 +7758,7 @@ uint f32tof16(float value) case hlsl: __intrinsic_asm "f32tof16($0)"; case cuda: __intrinsic_asm "__half_as_ushort(__float2half($0))"; case cpp: __intrinsic_asm "f32tof16($0)"; + case metal: __intrinsic_asm "as_type<ushort>((half)($0))"; case spirv: { return spirv_asm { @@ -7756,7 +7772,7 @@ uint f32tof16(float value) __generic<let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_5_0)] vector<uint, N> f32tof16(vector<float, N> value) { __target_switch @@ -7782,7 +7798,7 @@ vector<uint, N> f32tof16(vector<float, N> value) __glsl_version(420) [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_5_0)] float f16tof32(float16_t value) { __target_switch @@ -7791,6 +7807,7 @@ float f16tof32(float16_t value) case hlsl: __intrinsic_asm "f16tof32($0)"; case cuda: __intrinsic_asm "__half2float($0)"; case cpp: __intrinsic_asm "f16tof32($0)"; + case metal: __intrinsic_asm "float($0)"; case spirv: { return spirv_asm { @@ -7802,13 +7819,14 @@ float f16tof32(float16_t value) __generic<let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_5_0)] vector<float, N> f16tof32(vector<float16_t, N> value) { __target_switch { case cuda: __intrinsic_asm "__half2float"; case hlsl: __intrinsic_asm "f16tof32"; + case metal: __intrinsic_asm "$TR($0)"; case spirv: return spirv_asm { OpFConvert $$vector<float, N> result $value }; @@ -7820,13 +7838,14 @@ vector<float, N> f16tof32(vector<float16_t, N> value) // Convert to float16_t __glsl_version(420) [__readNone] -[require(cuda_glsl_spirv, shader5_sm_5_0)] +[require(cuda_glsl_metal_spirv, shader5_sm_5_0)] float16_t f32tof16_(float value) { __target_switch { case cuda: __intrinsic_asm "__float2half"; case glsl: __intrinsic_asm "packHalf2x16(vec2($0,0.0))"; + case metal: __intrinsic_asm "half($0)"; case spirv: return spirv_asm { OpFConvert $$float16_t result $value }; @@ -7835,12 +7854,13 @@ float16_t f32tof16_(float value) __generic<let N : int> [__readNone] -[require(cuda_glsl_spirv, shader5_sm_5_0)] +[require(cuda_glsl_metal_spirv, shader5_sm_5_0)] vector<float16_t, N> f32tof16_(vector<float, N> value) { __target_switch { case cuda: __intrinsic_asm "__float2half"; + case metal: __intrinsic_asm "$TR($0)"; case spirv: return spirv_asm { OpFConvert $$vector<float16_t, N> result $value }; @@ -7854,13 +7874,14 @@ vector<float16_t, N> f32tof16_(vector<float, N> value) // Flip surface normal to face forward, if needed __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_4_0_version)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector<T,N> faceforward(vector<T,N> n, vector<T,N> i, vector<T,N> ng) { __target_switch { case glsl: __intrinsic_asm "faceforward"; case hlsl: __intrinsic_asm "faceforward"; + case metal: __intrinsic_asm "faceforward"; case spirv: return spirv_asm { OpExtInst $$vector<T,N> result glsl450 FaceForward $n $i $ng }; @@ -7871,7 +7892,7 @@ vector<T,N> faceforward(vector<T,N> n, vector<T,N> i, vector<T,N> ng) // Find first set bit starting at high bit and working down [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_5_0)] int firstbithigh(int value) { __target_switch @@ -7880,6 +7901,7 @@ int firstbithigh(int value) case cuda: __intrinsic_asm "$P_firstbithigh($0)"; case glsl: __intrinsic_asm "findMSB"; case hlsl: __intrinsic_asm "firstbithigh"; + case metal: __intrinsic_asm "clz"; case spirv: return spirv_asm { OpExtInst $$int result glsl450 FindSMsb $value }; @@ -7888,13 +7910,14 @@ int firstbithigh(int value) __generic<let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_5_0)] vector<int, N> firstbithigh(vector<int, N> value) { __target_switch { case glsl: __intrinsic_asm "findMSB"; case hlsl: __intrinsic_asm "firstbithigh"; + case metal: __intrinsic_asm "clz"; case spirv: return spirv_asm { OpExtInst $$vector<int, N> result glsl450 FindSMsb $value }; @@ -7904,7 +7927,7 @@ vector<int, N> firstbithigh(vector<int, N> value) } [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_5_0)] uint firstbithigh(uint value) { __target_switch @@ -7913,6 +7936,7 @@ uint firstbithigh(uint value) case cuda: __intrinsic_asm "$P_firstbithigh($0)"; case glsl: __intrinsic_asm "findMSB"; case hlsl: __intrinsic_asm "firstbithigh"; + case metal: __intrinsic_asm "clz"; case spirv: return spirv_asm { OpExtInst $$uint result glsl450 FindUMsb $value }; @@ -7921,13 +7945,14 @@ uint firstbithigh(uint value) __generic<let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_5_0)] vector<uint,N> firstbithigh(vector<uint,N> value) { __target_switch { case glsl: __intrinsic_asm "findMSB"; case hlsl: __intrinsic_asm "firstbithigh"; + case metal: __intrinsic_asm "clz"; case spirv: return spirv_asm { OpExtInst $$vector<uint,N> result glsl450 FindUMsb $value }; @@ -7938,7 +7963,7 @@ vector<uint,N> firstbithigh(vector<uint,N> value) // Find first set bit starting at low bit and working up [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_5_0)] int firstbitlow(int value) { __target_switch @@ -7947,6 +7972,7 @@ int firstbitlow(int value) case cuda: __intrinsic_asm "$P_firstbitlow($0)"; case glsl: __intrinsic_asm "findLSB"; case hlsl: __intrinsic_asm "firstbitlow"; + case metal: __intrinsic_asm "ctz"; case spirv: return spirv_asm { OpExtInst $$int result glsl450 FindILsb $value }; @@ -7955,13 +7981,14 @@ int firstbitlow(int value) __generic<let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_5_0)] vector<int,N> firstbitlow(vector<int,N> value) { __target_switch { case glsl: __intrinsic_asm "findLSB"; case hlsl: __intrinsic_asm "firstbitlow"; + case metal: __intrinsic_asm "ctz"; case spirv: return spirv_asm { OpExtInst $$vector<int,N> result glsl450 FindILsb $value }; @@ -7971,7 +7998,7 @@ vector<int,N> firstbitlow(vector<int,N> value) } [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_5_0)] uint firstbitlow(uint value) { __target_switch @@ -7980,6 +8007,7 @@ uint firstbitlow(uint value) case cuda: __intrinsic_asm "$P_firstbitlow($0)"; case glsl: __intrinsic_asm "findLSB"; case hlsl: __intrinsic_asm "firstbitlow"; + case metal: __intrinsic_asm "ctz"; case spirv: return spirv_asm { OpExtInst $$uint result glsl450 FindILsb $value }; @@ -7988,13 +8016,14 @@ uint firstbitlow(uint value) __generic<let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_5_0)] vector<uint,N> firstbitlow(vector<uint,N> value) { __target_switch { case glsl: __intrinsic_asm "findLSB"; case hlsl: __intrinsic_asm "firstbitlow"; + case metal: __intrinsic_asm "ctz"; case spirv: return spirv_asm { OpExtInst $$vector<uint,N> result glsl450 FindILsb $value }; diff --git a/source/slang/slang-capabilities.capdef b/source/slang/slang-capabilities.capdef index 53679be35..d003e4f41 100644 --- a/source/slang/slang-capabilities.capdef +++ b/source/slang/slang-capabilities.capdef @@ -149,9 +149,11 @@ alias cpp_glsl_hlsl_spirv = cpp | glsl | hlsl | spirv; alias cpp_glsl_hlsl_metal_spirv = cpp | glsl | hlsl | metal | spirv; alias cpp_hlsl = cpp | hlsl; alias cuda_glsl_hlsl = cuda | glsl | hlsl; +alias cuda_hlsl_metal_spirv = cuda | hlsl | metal | spirv; alias cuda_glsl_hlsl_spirv = cuda | glsl | hlsl | spirv; alias cuda_glsl_hlsl_metal_spirv = cuda | glsl | hlsl | metal | spirv; alias cuda_glsl_spirv = cuda | glsl | spirv; +alias cuda_glsl_metal_spirv = cuda | glsl | metal | spirv; alias cuda_hlsl = cuda | hlsl; alias cuda_hlsl_spirv = cuda | hlsl | spirv; alias glsl_hlsl_spirv = glsl | hlsl | spirv; diff --git a/source/slang/slang-emit-hlsl.cpp b/source/slang/slang-emit-hlsl.cpp index adca798e6..72b2a08b0 100644 --- a/source/slang/slang-emit-hlsl.cpp +++ b/source/slang/slang-emit-hlsl.cpp @@ -714,7 +714,7 @@ bool HLSLSourceEmitter::tryEmitInstExprImpl(IRInst* inst, const EmitOpInfo& inOu m_writer->emit("("); emitOperand(inst->getOperand(1), getInfo(EmitOp::General)); m_writer->emit(", "); - emitOperand(inst->getOperand(2), getInfo(EmitOp::General)); + emitOperand(inst->getOperand(inst->getOperandCount() - 1), getInfo(EmitOp::General)); m_writer->emit(")"); maybeCloseParens(needClose); diff --git a/source/slang/slang-emit-metal.cpp b/source/slang/slang-emit-metal.cpp index 794a31e41..07d5b9f6c 100644 --- a/source/slang/slang-emit-metal.cpp +++ b/source/slang/slang-emit-metal.cpp @@ -143,24 +143,36 @@ void MetalSourceEmitter::emitFuncParamLayoutImpl(IRInst* param) auto layout = as<IRVarLayout>(layoutDecoration->getLayout()); if (!layout) return; + for (auto rr : layout->getOffsetAttrs()) { switch (rr->getResourceKind()) { case LayoutResourceKind::MetalTexture: - m_writer->emit(" [[texture("); - m_writer->emit(rr->getOffset()); - m_writer->emit(")]]"); + if (as<IRTextureTypeBase>(param->getDataType()) || as<IRTextureBufferType>(param->getDataType())) + { + m_writer->emit(" [[texture("); + m_writer->emit(rr->getOffset()); + m_writer->emit(")]]"); + } break; case LayoutResourceKind::MetalBuffer: - m_writer->emit(" [[buffer("); - m_writer->emit(rr->getOffset()); - m_writer->emit(")]]"); + if (as<IRPtrTypeBase>(param->getDataType()) || as<IRHLSLStructuredBufferTypeBase>(param->getDataType()) || + as<IRByteAddressBufferTypeBase>(param->getDataType()) || + as<IRUniformParameterGroupType>(param->getDataType())) + { + m_writer->emit(" [[buffer("); + m_writer->emit(rr->getOffset()); + m_writer->emit(")]]"); + } break; case LayoutResourceKind::SamplerState: - m_writer->emit(" [[sampler("); - m_writer->emit(rr->getOffset()); - m_writer->emit(")]]"); + if (as<IRSamplerStateTypeBase>(param->getDataType())) + { + m_writer->emit(" [[sampler("); + m_writer->emit(rr->getOffset()); + m_writer->emit(")]]"); + } break; case LayoutResourceKind::VaryingInput: m_writer->emit(" [[stage_in]]"); @@ -314,6 +326,15 @@ bool MetalSourceEmitter::tryEmitInstExprImpl(IRInst* inst, const EmitOpInfo& inO } break; } + case kIROp_FRem: + { + m_writer->emit("fmod("); + emitOperand(inst->getOperand(0), getInfo(EmitOp::General)); + m_writer->emit(", "); + emitOperand(inst->getOperand(1), getInfo(EmitOp::General)); + m_writer->emit(")"); + return true; + } case kIROp_Select: { m_writer->emit("select("); @@ -374,7 +395,7 @@ bool MetalSourceEmitter::tryEmitInstExprImpl(IRInst* inst, const EmitOpInfo& inO m_writer->emit("[("); emitOperand(offset, getInfo(EmitOp::General)); m_writer->emit(")>>2] = as_type<uint32_t>("); - emitOperand(inst->getOperand(2), getInfo(EmitOp::General)); + emitOperand(inst->getOperand(inst->getOperandCount() - 1), getInfo(EmitOp::General)); m_writer->emit(")"); return true; } @@ -587,9 +608,9 @@ void MetalSourceEmitter::emitSimpleTypeImpl(IRType* type) m_writer->emit("matrix<"); emitType(matType->getElementType()); m_writer->emit(","); - emitVal(matType->getColumnCount(), getInfo(EmitOp::General)); - m_writer->emit(","); emitVal(matType->getRowCount(), getInfo(EmitOp::General)); + m_writer->emit(","); + emitVal(matType->getColumnCount(), getInfo(EmitOp::General)); m_writer->emit("> "); return; } @@ -760,7 +781,7 @@ bool MetalSourceEmitter::maybeEmitSystemSemantic(IRInst* inst) return false; } -void MetalSourceEmitter::_emitUserSemantic(UnownedStringSlice semanticName, IRIntegerValue semanticIndex) +bool MetalSourceEmitter::_emitUserSemantic(UnownedStringSlice semanticName, IRIntegerValue semanticIndex) { if (!semanticName.startsWithCaseInsensitive(toSlice("SV_"))) { @@ -772,7 +793,9 @@ void MetalSourceEmitter::_emitUserSemantic(UnownedStringSlice semanticName, IRIn m_writer->emit(semanticIndex); } m_writer->emit(")]]"); + return true; } + return false; } void MetalSourceEmitter::emitSemanticsImpl(IRInst* inst, bool allowOffsets) @@ -785,8 +808,10 @@ void MetalSourceEmitter::emitSemanticsImpl(IRInst* inst, bool allowOffsets) if (maybeEmitSystemSemantic(inst)) return; - bool hasSemanticFromLayout = false; - if (auto varLayout = findVarLayout(inst)) + auto varLayout = findVarLayout(inst); + bool hasSemantic = false; + + if (varLayout) { for (auto attr : varLayout->getAllAttrs()) { @@ -797,18 +822,21 @@ void MetalSourceEmitter::emitSemanticsImpl(IRInst* inst, bool allowOffsets) m_writer->emit(" [[attribute("); m_writer->emit(offsetAttr->getOffset()); m_writer->emit(")]]"); + return; } } - else if (auto semanticAttr = as<IRSemanticAttr>(attr)) + } + for (auto attr : varLayout->getAllAttrs()) + { + if (auto semanticAttr = as<IRSemanticAttr>(attr)) { auto semanticName = String(semanticAttr->getName()).toUpper(); - _emitUserSemantic(semanticAttr->getName(), semanticAttr->getIndex()); - hasSemanticFromLayout = true; + hasSemantic = _emitUserSemantic(semanticAttr->getName(), semanticAttr->getIndex()); } } } - if (!hasSemanticFromLayout) + if (!hasSemantic) { if (auto semanticDecor = inst->findDecoration<IRSemanticDecoration>()) { diff --git a/source/slang/slang-emit-metal.h b/source/slang/slang-emit-metal.h index 8b014d604..32557bf27 100644 --- a/source/slang/slang-emit-metal.h +++ b/source/slang/slang-emit-metal.h @@ -76,7 +76,7 @@ protected: void _emitHLSLDecorationSingleInt(const char* name, IRFunc* entryPoint, IRIntLit* val); void _emitStageAccessSemantic(IRStageAccessDecoration* decoration, const char* name); - void _emitUserSemantic(UnownedStringSlice semanticName, IRIntegerValue semanticIndex); + bool _emitUserSemantic(UnownedStringSlice semanticName, IRIntegerValue semanticIndex); bool maybeEmitSystemSemantic(IRInst* inst); }; diff --git a/source/slang/slang-ir-byte-address-legalize.cpp b/source/slang/slang-ir-byte-address-legalize.cpp index 38bee566c..dba3ab5f5 100644 --- a/source/slang/slang-ir-byte-address-legalize.cpp +++ b/source/slang/slang-ir-byte-address-legalize.cpp @@ -1168,10 +1168,9 @@ struct ByteAddressBufferLegalizationContext uint64Val, m_builder.getIntValue(m_builder.getUInt64Type(), 32))); auto loOffset = offset; auto hiOffset = emitOffsetAddIfNeeded(offset, 4); - IRInst* storeLoArgs[] = { buffer, loOffset, loVal }; - IRInst* storeHiArgs[] = { buffer, hiOffset, hiVal }; - m_builder.emitIntrinsicInst(m_builder.getVoidType(), kIROp_ByteAddressBufferStore, 3, storeLoArgs); - m_builder.emitIntrinsicInst(m_builder.getVoidType(), kIROp_ByteAddressBufferStore, 3, storeHiArgs); + IRInst* alignment = m_builder.getIntValue(m_builder.getUIntType(), 0); + m_builder.emitByteAddressBufferStore(buffer, loOffset, alignment, loVal); + m_builder.emitByteAddressBufferStore(buffer, hiOffset, alignment, hiVal); return SLANG_OK; } else if (sizeAlignment.size < 4) @@ -1202,14 +1201,12 @@ struct ByteAddressBufferLegalizationContext mask = m_builder.emitBitNot(m_builder.getUIntType(), mask); auto maskedData = m_builder.emitBitAnd(m_builder.getUIntType(), existingVal, mask); auto newData = m_builder.emitBitOr(m_builder.getUIntType(), maskedData, shiftedData); - IRInst* storeArgs[] = { buffer, alignedOffset, newData }; - m_builder.emitIntrinsicInst(m_builder.getVoidType(), kIROp_ByteAddressBufferStore, 3, storeArgs); + m_builder.emitByteAddressBufferStore(buffer, alignedOffset, newData); return SLANG_OK; } } { - IRInst* storeArgs[] = { buffer, offset, value }; - m_builder.emitIntrinsicInst(m_builder.getVoidType(), kIROp_ByteAddressBufferStore, 3, storeArgs); + m_builder.emitByteAddressBufferStore(buffer, offset, value); return SLANG_OK; } } diff --git a/source/slang/slang-ir-insts.h b/source/slang/slang-ir-insts.h index bc86fa7ee..4781ea2c3 100644 --- a/source/slang/slang-ir-insts.h +++ b/source/slang/slang-ir-insts.h @@ -3926,6 +3926,9 @@ public: IRInst* emitOutImplicitCast(IRInst* type, IRInst* value); IRInst* emitInOutImplicitCast(IRInst* type, IRInst* value); + IRInst* emitByteAddressBufferStore(IRInst* byteAddressBuffer, IRInst* offset, IRInst* value); + IRInst* emitByteAddressBufferStore(IRInst* byteAddressBuffer, IRInst* offset, IRInst* alignment, IRInst* value); + IRFunc* createFunc(); IRGlobalVar* createGlobalVar( IRType* valueType); diff --git a/source/slang/slang-ir.cpp b/source/slang/slang-ir.cpp index b6d000d20..22ef4e6be 100644 --- a/source/slang/slang-ir.cpp +++ b/source/slang/slang-ir.cpp @@ -3213,6 +3213,18 @@ namespace Slang return inst; } + IRInst* IRBuilder::emitByteAddressBufferStore(IRInst* byteAddressBuffer, IRInst* offset, IRInst* value) + { + IRInst* args[] = { byteAddressBuffer, offset, getIntValue(getUIntType(), 0), value}; + return emitIntrinsicInst(getVoidType(), kIROp_ByteAddressBufferStore, 4, args); + } + + IRInst* IRBuilder::emitByteAddressBufferStore(IRInst* byteAddressBuffer, IRInst* offset, IRInst* alignment, IRInst* value) + { + IRInst* args[] = { byteAddressBuffer, offset, alignment, value }; + return emitIntrinsicInst(getVoidType(), kIROp_ByteAddressBufferStore, 4, args); + } + IRInst* IRBuilder::emitReinterpret(IRInst* type, IRInst* value) { return emitIntrinsicInst((IRType*)type, kIROp_Reinterpret, 1, &value); |
