summaryrefslogtreecommitdiffstats
path: root/source
diff options
context:
space:
mode:
authorYong He <yonghe@outlook.com>2024-06-13 16:29:58 -0700
committerGitHub <noreply@github.com>2024-06-13 16:29:58 -0700
commitcfef0c6f66c9d36ae2899c8c2790c3fe422a7700 (patch)
tree0e116769009ae7e4b3b16d2ef28f275bea5e1fd7 /source
parent2407966e899f9e4f490b23a92fc06d5da20544cc (diff)
Metal: misc fixes and enable more tests. (#4374)
* Fix and enable tests for metal. * Fix. * Fix. * Fix tests. * Fix warnings. * Fix. --------- Co-authored-by: Yong He <yonghe@Yongs-Mac-mini.local>
Diffstat (limited to 'source')
-rw-r--r--source/slang/hlsl.meta.slang161
-rw-r--r--source/slang/slang-capabilities.capdef2
-rw-r--r--source/slang/slang-emit-hlsl.cpp2
-rw-r--r--source/slang/slang-emit-metal.cpp66
-rw-r--r--source/slang/slang-emit-metal.h2
-rw-r--r--source/slang/slang-ir-byte-address-legalize.cpp13
-rw-r--r--source/slang/slang-ir-insts.h3
-rw-r--r--source/slang/slang-ir.cpp12
8 files changed, 166 insertions, 95 deletions
diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang
index 9a87604ae..10d183da2 100644
--- a/source/slang/hlsl.meta.slang
+++ b/source/slang/hlsl.meta.slang
@@ -1784,20 +1784,24 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,0,format>
{
switch (Shape.flavor)
{
+ case $(SLANG_TEXTURE_1D):
+ __intrinsic_asm "$c$0.sample($1, ($2).x, uint(($2).y))$z";
case $(SLANG_TEXTURE_2D):
- __intrinsic_asm "$0.sample($1, ($2).xy, uint(($2).z), level($3))";
+ __intrinsic_asm "$c$0.sample($1, ($2).xy, uint(($2).z), level($3))$z";
case $(SLANG_TEXTURE_CUBE):
- __intrinsic_asm "$0.sample($1, ($2).xyz, uint(($2).w), level($3))";
+ __intrinsic_asm "$c$0.sample($1, ($2).xyz, uint(($2).w), level($3))$z";
}
}
else
{
switch (Shape.flavor)
{
+ case $(SLANG_TEXTURE_1D):
+ __intrinsic_asm "$c$0.sample($1, $2)$z";
case $(SLANG_TEXTURE_2D):
case $(SLANG_TEXTURE_3D):
case $(SLANG_TEXTURE_CUBE):
- __intrinsic_asm "$0.sample($1, $2, level($3))";
+ __intrinsic_asm "$c$0.sample($1, $2, level($3))$z";
}
}
// TODO: This needs to be handled by the capability system
@@ -1866,9 +1870,9 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,0,format>
switch (Shape.flavor)
{
case $(SLANG_TEXTURE_2D):
- __intrinsic_asm "$0.sample($1, ($2).xy, uint(($2).z), level($3), $4)";
+ __intrinsic_asm "$c$0.sample($1, ($2).xy, uint(($2).z), level($3), $4)$z";
case $(SLANG_TEXTURE_CUBE):
- __intrinsic_asm "$0.sample($1, ($2).xyz, uint(($2).w), level($3), $4)";
+ __intrinsic_asm "$c$0.sample($1, ($2).xyz, uint(($2).w), level($3), $4)$z";
}
}
else
@@ -1878,7 +1882,7 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,0,format>
case $(SLANG_TEXTURE_2D):
case $(SLANG_TEXTURE_3D):
case $(SLANG_TEXTURE_CUBE):
- __intrinsic_asm "$0.sample($1, $2, level($3), $4)";
+ __intrinsic_asm "$c$0.sample($1, $2, level($3), $4)$z";
}
}
__intrinsic_asm "<invalid intrinsic>";
@@ -3883,7 +3887,7 @@ struct $(item.name)
[__NoSideEffect]
[ForceInline]
- [require(cpp_cuda_glsl_hlsl_spirv, byteaddressbuffer_rw)]
+ [require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)]
uint Load(int location)
{
__target_switch
@@ -3899,7 +3903,7 @@ struct $(item.name)
[__NoSideEffect]
[ForceInline]
- [require(cpp_cuda_glsl_hlsl_spirv, byteaddressbuffer_rw)]
+ [require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)]
uint2 Load2(int location)
{
__target_switch
@@ -3912,7 +3916,7 @@ struct $(item.name)
[__NoSideEffect]
[ForceInline]
- [require(cpp_cuda_glsl_hlsl_spirv, byteaddressbuffer_rw)]
+ [require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)]
uint2 Load2(int location, int alignment)
{
__target_switch
@@ -3925,7 +3929,7 @@ struct $(item.name)
[__NoSideEffect]
[ForceInline]
- [require(cpp_cuda_glsl_hlsl_spirv, byteaddressbuffer_rw)]
+ [require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)]
uint2 Load2Aligned(int location)
{
__target_switch
@@ -3941,7 +3945,7 @@ struct $(item.name)
[__NoSideEffect]
[ForceInline]
- [require(cpp_cuda_glsl_hlsl_spirv, byteaddressbuffer_rw)]
+ [require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)]
uint3 Load3(int location)
{
__target_switch
@@ -3954,7 +3958,7 @@ struct $(item.name)
[__NoSideEffect]
[ForceInline]
- [require(cpp_cuda_glsl_hlsl_spirv, byteaddressbuffer_rw)]
+ [require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)]
uint3 Load3(int location, int alignment)
{
__target_switch
@@ -3967,7 +3971,7 @@ struct $(item.name)
[__NoSideEffect]
[ForceInline]
- [require(cpp_cuda_glsl_hlsl_spirv, byteaddressbuffer_rw)]
+ [require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)]
uint3 Load3Aligned(int location)
{
__target_switch
@@ -3983,7 +3987,7 @@ struct $(item.name)
[__NoSideEffect]
[ForceInline]
- [require(cpp_cuda_glsl_hlsl_spirv, byteaddressbuffer_rw)]
+ [require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)]
uint4 Load4(int location)
{
__target_switch
@@ -3996,7 +4000,7 @@ struct $(item.name)
[__NoSideEffect]
[ForceInline]
- [require(cpp_cuda_glsl_hlsl_spirv, byteaddressbuffer_rw)]
+ [require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)]
uint4 Load4(int location, int alignment)
{
__target_switch
@@ -4009,7 +4013,7 @@ struct $(item.name)
[__NoSideEffect]
[ForceInline]
- [require(cpp_cuda_glsl_hlsl_spirv, byteaddressbuffer_rw)]
+ [require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)]
uint4 Load4Aligned(int location)
{
__target_switch
@@ -4025,7 +4029,7 @@ struct $(item.name)
[__NoSideEffect]
[ForceInline]
- [require(cpp_cuda_glsl_hlsl_spirv, byteaddressbuffer_rw)]
+ [require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)]
T Load<T>(int location)
{
return __byteAddressBufferLoad<T>(this, location, 0);
@@ -4033,7 +4037,7 @@ struct $(item.name)
[__NoSideEffect]
[ForceInline]
- [require(cpp_cuda_glsl_hlsl_spirv, byteaddressbuffer_rw)]
+ [require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)]
T Load<T>(int location, int alignment)
{
return __byteAddressBufferLoad<T>(this, location, alignment);
@@ -4041,7 +4045,7 @@ struct $(item.name)
[__NoSideEffect]
[ForceInline]
- [require(cpp_cuda_glsl_hlsl_spirv, byteaddressbuffer_rw)]
+ [require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)]
T LoadAligned<T>(int location)
{
return __byteAddressBufferLoad<T>(this, location, __naturalStrideOf<T>());
@@ -4894,7 +4898,7 @@ ${{{{
}
[ForceInline]
- [require(cpp_cuda_glsl_hlsl_spirv, byteaddressbuffer_rw)]
+ [require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)]
void Store(uint address, uint value)
{
__target_switch
@@ -4907,7 +4911,7 @@ ${{{{
[ForceInline]
- [require(cpp_cuda_glsl_hlsl_spirv, byteaddressbuffer_rw)]
+ [require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)]
void Store2(uint address, uint2 value)
{
__target_switch
@@ -4920,7 +4924,7 @@ ${{{{
[ForceInline]
- [require(cpp_cuda_glsl_hlsl_spirv, byteaddressbuffer_rw)]
+ [require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)]
void Store2(uint address, uint2 value, uint alignment)
{
__target_switch
@@ -4932,7 +4936,7 @@ ${{{{
}
[ForceInline]
- [require(cpp_cuda_glsl_hlsl_spirv, byteaddressbuffer_rw)]
+ [require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)]
void Store2Aligned(uint address, uint2 value)
{
__target_switch
@@ -4944,7 +4948,7 @@ ${{{{
}
[ForceInline]
- [require(cpp_cuda_glsl_hlsl_spirv, byteaddressbuffer_rw)]
+ [require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)]
void Store3(uint address, uint3 value)
{
__target_switch
@@ -4957,7 +4961,7 @@ ${{{{
[ForceInline]
- [require(cpp_cuda_glsl_hlsl_spirv, byteaddressbuffer_rw)]
+ [require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)]
void Store3(uint address, uint3 value, uint alignment)
{
__target_switch
@@ -4994,7 +4998,7 @@ ${{{{
[ForceInline]
- [require(cpp_cuda_glsl_hlsl_spirv, byteaddressbuffer_rw)]
+ [require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)]
void Store4(uint address, uint4 value, uint alignment)
{
__target_switch
@@ -5006,7 +5010,7 @@ ${{{{
}
[ForceInline]
- [require(cpp_cuda_glsl_hlsl_spirv, byteaddressbuffer_rw)]
+ [require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)]
void Store4Aligned(uint address, uint4 value)
{
__target_switch
@@ -5685,7 +5689,7 @@ double asdouble(uint lowbits, uint highbits)
// Reinterpret bits as a float (HLSL SM 4.0)
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_4_0)]
float asfloat(int x)
{
__target_switch
@@ -5694,6 +5698,7 @@ float asfloat(int x)
case cuda: __intrinsic_asm "$P_asfloat($0)";
case glsl: __intrinsic_asm "intBitsToFloat";
case hlsl: __intrinsic_asm "asfloat";
+ case metal: __intrinsic_asm "as_type<$TR>($0)";
case spirv: return spirv_asm {
OpBitcast $$float result $x
};
@@ -5701,7 +5706,7 @@ float asfloat(int x)
}
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_4_0)]
float asfloat(uint x)
{
__target_switch
@@ -5710,6 +5715,7 @@ float asfloat(uint x)
case cuda: __intrinsic_asm "$P_asfloat($0)";
case glsl: __intrinsic_asm "uintBitsToFloat";
case hlsl: __intrinsic_asm "asfloat";
+ case metal: __intrinsic_asm "as_type<$TR>($0)";
case spirv: return spirv_asm {
OpBitcast $$float result $x
};
@@ -5718,13 +5724,14 @@ float asfloat(uint x)
__generic<let N : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_4_0)]
vector<float, N> asfloat(vector< int, N> x)
{
__target_switch
{
case glsl: __intrinsic_asm "intBitsToFloat";
case hlsl: __intrinsic_asm "asfloat";
+ case metal: __intrinsic_asm "as_type<$TR>($0)";
case spirv: return spirv_asm {
OpBitcast $$vector<float, N> result $x
};
@@ -5735,13 +5742,14 @@ vector<float, N> asfloat(vector< int, N> x)
__generic<let N : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_4_0)]
vector<float,N> asfloat(vector<uint,N> x)
{
__target_switch
{
case glsl: __intrinsic_asm "uintBitsToFloat";
case hlsl: __intrinsic_asm "asfloat";
+ case metal: __intrinsic_asm "as_type<$TR>($0)";
case spirv: return spirv_asm {
OpBitcast $$vector<float,N> result $x
};
@@ -5779,21 +5787,18 @@ matrix<float,N,M> asfloat(matrix<uint,N,M> x)
// No op
[__unsafeForceInlineEarly]
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_4_0_version)]
float asfloat(float x)
{ return x; }
__generic<let N : int>
[__unsafeForceInlineEarly]
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_4_0_version)]
vector<float,N> asfloat(vector<float,N> x)
{ return x; }
__generic<let N : int, let M : int>
[__unsafeForceInlineEarly]
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_4_0_version)]
matrix<float,N,M> asfloat(matrix<float,N,M> x)
{ return x; }
@@ -5890,7 +5895,7 @@ vector<T,N> asinh(vector<T,N> x)
// Reinterpret bits as an int (HLSL SM 4.0)
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_4_0)]
int asint(float x)
{
__target_switch
@@ -5899,6 +5904,7 @@ int asint(float x)
case cuda: __intrinsic_asm "$P_asint($0)";
case glsl: __intrinsic_asm "floatBitsToInt";
case hlsl: __intrinsic_asm "asint";
+ case metal: __intrinsic_asm "as_type<$TR>($0)";
case spirv: return spirv_asm {
OpBitcast $$int result $x
};
@@ -5906,7 +5912,7 @@ int asint(float x)
}
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_4_0)]
int asint(uint x)
{
__target_switch
@@ -5915,6 +5921,7 @@ int asint(uint x)
case cuda: __intrinsic_asm "$P_asint($0)";
case glsl: __intrinsic_asm "int($0)";
case hlsl: __intrinsic_asm "asint";
+ case metal: __intrinsic_asm "as_type<$TR>($0)";
case spirv: return spirv_asm {
OpBitcast $$int result $x
};
@@ -5923,13 +5930,14 @@ int asint(uint x)
__generic<let N : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_4_0)]
vector<int, N> asint(vector<float, N> x)
{
__target_switch
{
case glsl: __intrinsic_asm "floatBitsToInt";
case hlsl: __intrinsic_asm "asint";
+ case metal: __intrinsic_asm "as_type<$TR>($0)";
case spirv: return spirv_asm {
OpBitcast $$vector<int, N> result $x
};
@@ -5940,13 +5948,14 @@ vector<int, N> asint(vector<float, N> x)
__generic<let N : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_4_0)]
vector<int, N> asint(vector<uint, N> x)
{
__target_switch
{
case glsl: __intrinsic_asm "ivec$N0($0)";
case hlsl: __intrinsic_asm "asint";
+ case metal: __intrinsic_asm "as_type<$TR>($0)";
case spirv: return spirv_asm {
OpBitcast $$vector<int, N> result $x
};
@@ -5984,21 +5993,18 @@ matrix<int, N, M> asint(matrix<uint, N, M> x)
// No op
[__unsafeForceInlineEarly]
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)]
int asint(int x)
{ return x; }
__generic<let N : int>
[__unsafeForceInlineEarly]
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)]
vector<int,N> asint(vector<int,N> x)
{ return x; }
__generic<let N : int, let M : int>
[__unsafeForceInlineEarly]
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)]
matrix<int,N,M> asint(matrix<int,N,M> x)
{ return x; }
@@ -6030,7 +6036,7 @@ void asuint(double value, out uint lowbits, out uint highbits)
// Reinterpret bits as a uint (HLSL SM 4.0)
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_4_0)]
uint asuint(float x)
{
__target_switch
@@ -6039,6 +6045,7 @@ uint asuint(float x)
case cuda: __intrinsic_asm "$P_asuint($0)";
case glsl: __intrinsic_asm "floatBitsToUint";
case hlsl: __intrinsic_asm "asuint";
+ case metal: __intrinsic_asm "as_type<$TR>($0)";
case spirv: return spirv_asm {
OpBitcast $$uint result $x
};
@@ -6046,7 +6053,7 @@ uint asuint(float x)
}
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_4_0)]
uint asuint(int x)
{
__target_switch
@@ -6055,6 +6062,7 @@ uint asuint(int x)
case cuda: __intrinsic_asm "$P_asuint($0)";
case glsl: __intrinsic_asm "uint($0)";
case hlsl: __intrinsic_asm "asuint";
+ case metal: __intrinsic_asm "as_type<$TR>($0)";
case spirv: return spirv_asm {
OpBitcast $$uint result $x
};
@@ -6063,13 +6071,14 @@ uint asuint(int x)
__generic<let N : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_4_0)]
vector<uint,N> asuint(vector<float,N> x)
{
__target_switch
{
case glsl: __intrinsic_asm "floatBitsToUint";
case hlsl: __intrinsic_asm "asuint";
+ case metal: __intrinsic_asm "as_type<$TR>($0)";
case spirv: return spirv_asm {
OpBitcast $$vector<uint,N> result $x
};
@@ -6080,13 +6089,14 @@ vector<uint,N> asuint(vector<float,N> x)
__generic<let N : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_4_0)]
vector<uint, N> asuint(vector<int, N> x)
{
__target_switch
{
case glsl: __intrinsic_asm "uvec$N0($0)";
case hlsl: __intrinsic_asm "asuint";
+ case metal: __intrinsic_asm "as_type<$TR>($0)";
case spirv: return spirv_asm {
OpBitcast $$vector<uint, N> result $x
};
@@ -6246,13 +6256,14 @@ matrix<float16_t,R,C> asfloat16<let R : int, let C : int>(matrix<uint16_t,R,C> v
[__unsafeForceInlineEarly]
[__readNone]
-[require(cuda_hlsl_spirv, shader5_sm_5_0)]
+[require(cuda_hlsl_metal_spirv, shader5_sm_5_0)]
int16_t asint16(float16_t value)
{
__target_switch
{
case cuda: __intrinsic_asm "__half_as_short";
case hlsl: __intrinsic_asm "asint16";
+ case metal: __intrinsic_asm "as_type<$TR>($0)";
case spirv: return spirv_asm {
OpBitcast $$int16_t result $value
};
@@ -6262,12 +6273,13 @@ int16_t asint16(float16_t value)
[__unsafeForceInlineEarly]
[__readNone]
-[require(cuda_hlsl_spirv, shader5_sm_5_0)]
+[require(cuda_hlsl_metal_spirv, shader5_sm_5_0)]
vector<int16_t,N> asint16<let N : int>(vector<float16_t,N> value)
{
__target_switch
{
case hlsl: __intrinsic_asm "asint16";
+ case metal: __intrinsic_asm "as_type<$TR>($0)";
default: return asuint16(value);
}
}
@@ -6286,13 +6298,14 @@ matrix<int16_t,R,C> asint16<let R : int, let C : int>(matrix<float16_t,R,C> valu
[__readNone]
[__unsafeForceInlineEarly]
-[require(cuda_hlsl_spirv, shader5_sm_5_0)]
+[require(cuda_hlsl_metal_spirv, shader5_sm_5_0)]
float16_t asfloat16(int16_t value)
{
__target_switch
{
case cuda: __intrinsic_asm "__short_as_half";
case hlsl: __intrinsic_asm "asfloat16";
+ case metal: __intrinsic_asm "as_type<$TR>($0)";
case spirv: return spirv_asm {
OpBitcast $$float16_t result $value
};
@@ -6302,12 +6315,13 @@ float16_t asfloat16(int16_t value)
[__unsafeForceInlineEarly]
[__readNone]
-[require(cuda_hlsl_spirv, shader5_sm_5_0)]
+[require(cuda_hlsl_metal_spirv, shader5_sm_5_0)]
vector<float16_t,N> asfloat16<let N : int>(vector<int16_t,N> value)
{
__target_switch
{
case hlsl: __intrinsic_asm "asfloat16";
+ case metal: __intrinsic_asm "as_type<$TR>($0)";
case spirv: return spirv_asm {
OpBitcast $$vector<float16_t,N> result $value
};
@@ -7687,7 +7701,7 @@ vector<T,N> exp10(vector<T,N> x)
__glsl_version(420)
__cuda_sm_version(6.0)
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_5_0)]
float f16tof32(uint value)
{
__target_switch
@@ -7696,6 +7710,7 @@ float f16tof32(uint value)
case hlsl: __intrinsic_asm "f16tof32($0)";
case cuda: __intrinsic_asm "__half2float(__ushort_as_half($0))";
case cpp: __intrinsic_asm "f16tof32($0)";
+ case metal: __intrinsic_asm "as_type<half>((ushort)($0))";
case spirv:
{
return spirv_asm {
@@ -7709,7 +7724,7 @@ float f16tof32(uint value)
__generic<let N : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_5_0)]
vector<float, N> f16tof32(vector<uint, N> value)
{
__target_switch
@@ -7734,7 +7749,7 @@ vector<float, N> f16tof32(vector<uint, N> value)
__glsl_version(420)
__cuda_sm_version(6.0)
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_5_0)]
uint f32tof16(float value)
{
__target_switch
@@ -7743,6 +7758,7 @@ uint f32tof16(float value)
case hlsl: __intrinsic_asm "f32tof16($0)";
case cuda: __intrinsic_asm "__half_as_ushort(__float2half($0))";
case cpp: __intrinsic_asm "f32tof16($0)";
+ case metal: __intrinsic_asm "as_type<ushort>((half)($0))";
case spirv:
{
return spirv_asm {
@@ -7756,7 +7772,7 @@ uint f32tof16(float value)
__generic<let N : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_5_0)]
vector<uint, N> f32tof16(vector<float, N> value)
{
__target_switch
@@ -7782,7 +7798,7 @@ vector<uint, N> f32tof16(vector<float, N> value)
__glsl_version(420)
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_5_0)]
float f16tof32(float16_t value)
{
__target_switch
@@ -7791,6 +7807,7 @@ float f16tof32(float16_t value)
case hlsl: __intrinsic_asm "f16tof32($0)";
case cuda: __intrinsic_asm "__half2float($0)";
case cpp: __intrinsic_asm "f16tof32($0)";
+ case metal: __intrinsic_asm "float($0)";
case spirv:
{
return spirv_asm {
@@ -7802,13 +7819,14 @@ float f16tof32(float16_t value)
__generic<let N : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_5_0)]
vector<float, N> f16tof32(vector<float16_t, N> value)
{
__target_switch
{
case cuda: __intrinsic_asm "__half2float";
case hlsl: __intrinsic_asm "f16tof32";
+ case metal: __intrinsic_asm "$TR($0)";
case spirv: return spirv_asm {
OpFConvert $$vector<float, N> result $value
};
@@ -7820,13 +7838,14 @@ vector<float, N> f16tof32(vector<float16_t, N> value)
// Convert to float16_t
__glsl_version(420)
[__readNone]
-[require(cuda_glsl_spirv, shader5_sm_5_0)]
+[require(cuda_glsl_metal_spirv, shader5_sm_5_0)]
float16_t f32tof16_(float value)
{
__target_switch
{
case cuda: __intrinsic_asm "__float2half";
case glsl: __intrinsic_asm "packHalf2x16(vec2($0,0.0))";
+ case metal: __intrinsic_asm "half($0)";
case spirv: return spirv_asm {
OpFConvert $$float16_t result $value
};
@@ -7835,12 +7854,13 @@ float16_t f32tof16_(float value)
__generic<let N : int>
[__readNone]
-[require(cuda_glsl_spirv, shader5_sm_5_0)]
+[require(cuda_glsl_metal_spirv, shader5_sm_5_0)]
vector<float16_t, N> f32tof16_(vector<float, N> value)
{
__target_switch
{
case cuda: __intrinsic_asm "__float2half";
+ case metal: __intrinsic_asm "$TR($0)";
case spirv: return spirv_asm {
OpFConvert $$vector<float16_t, N> result $value
};
@@ -7854,13 +7874,14 @@ vector<float16_t, N> f32tof16_(vector<float, N> value)
// Flip surface normal to face forward, if needed
__generic<T : __BuiltinFloatingPointType, let N : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_4_0_version)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)]
vector<T,N> faceforward(vector<T,N> n, vector<T,N> i, vector<T,N> ng)
{
__target_switch
{
case glsl: __intrinsic_asm "faceforward";
case hlsl: __intrinsic_asm "faceforward";
+ case metal: __intrinsic_asm "faceforward";
case spirv: return spirv_asm {
OpExtInst $$vector<T,N> result glsl450 FaceForward $n $i $ng
};
@@ -7871,7 +7892,7 @@ vector<T,N> faceforward(vector<T,N> n, vector<T,N> i, vector<T,N> ng)
// Find first set bit starting at high bit and working down
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_5_0)]
int firstbithigh(int value)
{
__target_switch
@@ -7880,6 +7901,7 @@ int firstbithigh(int value)
case cuda: __intrinsic_asm "$P_firstbithigh($0)";
case glsl: __intrinsic_asm "findMSB";
case hlsl: __intrinsic_asm "firstbithigh";
+ case metal: __intrinsic_asm "clz";
case spirv: return spirv_asm {
OpExtInst $$int result glsl450 FindSMsb $value
};
@@ -7888,13 +7910,14 @@ int firstbithigh(int value)
__generic<let N : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_5_0)]
vector<int, N> firstbithigh(vector<int, N> value)
{
__target_switch
{
case glsl: __intrinsic_asm "findMSB";
case hlsl: __intrinsic_asm "firstbithigh";
+ case metal: __intrinsic_asm "clz";
case spirv: return spirv_asm {
OpExtInst $$vector<int, N> result glsl450 FindSMsb $value
};
@@ -7904,7 +7927,7 @@ vector<int, N> firstbithigh(vector<int, N> value)
}
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_5_0)]
uint firstbithigh(uint value)
{
__target_switch
@@ -7913,6 +7936,7 @@ uint firstbithigh(uint value)
case cuda: __intrinsic_asm "$P_firstbithigh($0)";
case glsl: __intrinsic_asm "findMSB";
case hlsl: __intrinsic_asm "firstbithigh";
+ case metal: __intrinsic_asm "clz";
case spirv: return spirv_asm {
OpExtInst $$uint result glsl450 FindUMsb $value
};
@@ -7921,13 +7945,14 @@ uint firstbithigh(uint value)
__generic<let N : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_5_0)]
vector<uint,N> firstbithigh(vector<uint,N> value)
{
__target_switch
{
case glsl: __intrinsic_asm "findMSB";
case hlsl: __intrinsic_asm "firstbithigh";
+ case metal: __intrinsic_asm "clz";
case spirv: return spirv_asm {
OpExtInst $$vector<uint,N> result glsl450 FindUMsb $value
};
@@ -7938,7 +7963,7 @@ vector<uint,N> firstbithigh(vector<uint,N> value)
// Find first set bit starting at low bit and working up
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_5_0)]
int firstbitlow(int value)
{
__target_switch
@@ -7947,6 +7972,7 @@ int firstbitlow(int value)
case cuda: __intrinsic_asm "$P_firstbitlow($0)";
case glsl: __intrinsic_asm "findLSB";
case hlsl: __intrinsic_asm "firstbitlow";
+ case metal: __intrinsic_asm "ctz";
case spirv: return spirv_asm {
OpExtInst $$int result glsl450 FindILsb $value
};
@@ -7955,13 +7981,14 @@ int firstbitlow(int value)
__generic<let N : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_5_0)]
vector<int,N> firstbitlow(vector<int,N> value)
{
__target_switch
{
case glsl: __intrinsic_asm "findLSB";
case hlsl: __intrinsic_asm "firstbitlow";
+ case metal: __intrinsic_asm "ctz";
case spirv: return spirv_asm {
OpExtInst $$vector<int,N> result glsl450 FindILsb $value
};
@@ -7971,7 +7998,7 @@ vector<int,N> firstbitlow(vector<int,N> value)
}
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_5_0)]
uint firstbitlow(uint value)
{
__target_switch
@@ -7980,6 +8007,7 @@ uint firstbitlow(uint value)
case cuda: __intrinsic_asm "$P_firstbitlow($0)";
case glsl: __intrinsic_asm "findLSB";
case hlsl: __intrinsic_asm "firstbitlow";
+ case metal: __intrinsic_asm "ctz";
case spirv: return spirv_asm {
OpExtInst $$uint result glsl450 FindILsb $value
};
@@ -7988,13 +8016,14 @@ uint firstbitlow(uint value)
__generic<let N : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_5_0)]
vector<uint,N> firstbitlow(vector<uint,N> value)
{
__target_switch
{
case glsl: __intrinsic_asm "findLSB";
case hlsl: __intrinsic_asm "firstbitlow";
+ case metal: __intrinsic_asm "ctz";
case spirv: return spirv_asm {
OpExtInst $$vector<uint,N> result glsl450 FindILsb $value
};
diff --git a/source/slang/slang-capabilities.capdef b/source/slang/slang-capabilities.capdef
index 53679be35..d003e4f41 100644
--- a/source/slang/slang-capabilities.capdef
+++ b/source/slang/slang-capabilities.capdef
@@ -149,9 +149,11 @@ alias cpp_glsl_hlsl_spirv = cpp | glsl | hlsl | spirv;
alias cpp_glsl_hlsl_metal_spirv = cpp | glsl | hlsl | metal | spirv;
alias cpp_hlsl = cpp | hlsl;
alias cuda_glsl_hlsl = cuda | glsl | hlsl;
+alias cuda_hlsl_metal_spirv = cuda | hlsl | metal | spirv;
alias cuda_glsl_hlsl_spirv = cuda | glsl | hlsl | spirv;
alias cuda_glsl_hlsl_metal_spirv = cuda | glsl | hlsl | metal | spirv;
alias cuda_glsl_spirv = cuda | glsl | spirv;
+alias cuda_glsl_metal_spirv = cuda | glsl | metal | spirv;
alias cuda_hlsl = cuda | hlsl;
alias cuda_hlsl_spirv = cuda | hlsl | spirv;
alias glsl_hlsl_spirv = glsl | hlsl | spirv;
diff --git a/source/slang/slang-emit-hlsl.cpp b/source/slang/slang-emit-hlsl.cpp
index adca798e6..72b2a08b0 100644
--- a/source/slang/slang-emit-hlsl.cpp
+++ b/source/slang/slang-emit-hlsl.cpp
@@ -714,7 +714,7 @@ bool HLSLSourceEmitter::tryEmitInstExprImpl(IRInst* inst, const EmitOpInfo& inOu
m_writer->emit("(");
emitOperand(inst->getOperand(1), getInfo(EmitOp::General));
m_writer->emit(", ");
- emitOperand(inst->getOperand(2), getInfo(EmitOp::General));
+ emitOperand(inst->getOperand(inst->getOperandCount() - 1), getInfo(EmitOp::General));
m_writer->emit(")");
maybeCloseParens(needClose);
diff --git a/source/slang/slang-emit-metal.cpp b/source/slang/slang-emit-metal.cpp
index 794a31e41..07d5b9f6c 100644
--- a/source/slang/slang-emit-metal.cpp
+++ b/source/slang/slang-emit-metal.cpp
@@ -143,24 +143,36 @@ void MetalSourceEmitter::emitFuncParamLayoutImpl(IRInst* param)
auto layout = as<IRVarLayout>(layoutDecoration->getLayout());
if (!layout)
return;
+
for (auto rr : layout->getOffsetAttrs())
{
switch (rr->getResourceKind())
{
case LayoutResourceKind::MetalTexture:
- m_writer->emit(" [[texture(");
- m_writer->emit(rr->getOffset());
- m_writer->emit(")]]");
+ if (as<IRTextureTypeBase>(param->getDataType()) || as<IRTextureBufferType>(param->getDataType()))
+ {
+ m_writer->emit(" [[texture(");
+ m_writer->emit(rr->getOffset());
+ m_writer->emit(")]]");
+ }
break;
case LayoutResourceKind::MetalBuffer:
- m_writer->emit(" [[buffer(");
- m_writer->emit(rr->getOffset());
- m_writer->emit(")]]");
+ if (as<IRPtrTypeBase>(param->getDataType()) || as<IRHLSLStructuredBufferTypeBase>(param->getDataType()) ||
+ as<IRByteAddressBufferTypeBase>(param->getDataType()) ||
+ as<IRUniformParameterGroupType>(param->getDataType()))
+ {
+ m_writer->emit(" [[buffer(");
+ m_writer->emit(rr->getOffset());
+ m_writer->emit(")]]");
+ }
break;
case LayoutResourceKind::SamplerState:
- m_writer->emit(" [[sampler(");
- m_writer->emit(rr->getOffset());
- m_writer->emit(")]]");
+ if (as<IRSamplerStateTypeBase>(param->getDataType()))
+ {
+ m_writer->emit(" [[sampler(");
+ m_writer->emit(rr->getOffset());
+ m_writer->emit(")]]");
+ }
break;
case LayoutResourceKind::VaryingInput:
m_writer->emit(" [[stage_in]]");
@@ -314,6 +326,15 @@ bool MetalSourceEmitter::tryEmitInstExprImpl(IRInst* inst, const EmitOpInfo& inO
}
break;
}
+ case kIROp_FRem:
+ {
+ m_writer->emit("fmod(");
+ emitOperand(inst->getOperand(0), getInfo(EmitOp::General));
+ m_writer->emit(", ");
+ emitOperand(inst->getOperand(1), getInfo(EmitOp::General));
+ m_writer->emit(")");
+ return true;
+ }
case kIROp_Select:
{
m_writer->emit("select(");
@@ -374,7 +395,7 @@ bool MetalSourceEmitter::tryEmitInstExprImpl(IRInst* inst, const EmitOpInfo& inO
m_writer->emit("[(");
emitOperand(offset, getInfo(EmitOp::General));
m_writer->emit(")>>2] = as_type<uint32_t>(");
- emitOperand(inst->getOperand(2), getInfo(EmitOp::General));
+ emitOperand(inst->getOperand(inst->getOperandCount() - 1), getInfo(EmitOp::General));
m_writer->emit(")");
return true;
}
@@ -587,9 +608,9 @@ void MetalSourceEmitter::emitSimpleTypeImpl(IRType* type)
m_writer->emit("matrix<");
emitType(matType->getElementType());
m_writer->emit(",");
- emitVal(matType->getColumnCount(), getInfo(EmitOp::General));
- m_writer->emit(",");
emitVal(matType->getRowCount(), getInfo(EmitOp::General));
+ m_writer->emit(",");
+ emitVal(matType->getColumnCount(), getInfo(EmitOp::General));
m_writer->emit("> ");
return;
}
@@ -760,7 +781,7 @@ bool MetalSourceEmitter::maybeEmitSystemSemantic(IRInst* inst)
return false;
}
-void MetalSourceEmitter::_emitUserSemantic(UnownedStringSlice semanticName, IRIntegerValue semanticIndex)
+bool MetalSourceEmitter::_emitUserSemantic(UnownedStringSlice semanticName, IRIntegerValue semanticIndex)
{
if (!semanticName.startsWithCaseInsensitive(toSlice("SV_")))
{
@@ -772,7 +793,9 @@ void MetalSourceEmitter::_emitUserSemantic(UnownedStringSlice semanticName, IRIn
m_writer->emit(semanticIndex);
}
m_writer->emit(")]]");
+ return true;
}
+ return false;
}
void MetalSourceEmitter::emitSemanticsImpl(IRInst* inst, bool allowOffsets)
@@ -785,8 +808,10 @@ void MetalSourceEmitter::emitSemanticsImpl(IRInst* inst, bool allowOffsets)
if (maybeEmitSystemSemantic(inst))
return;
- bool hasSemanticFromLayout = false;
- if (auto varLayout = findVarLayout(inst))
+ auto varLayout = findVarLayout(inst);
+ bool hasSemantic = false;
+
+ if (varLayout)
{
for (auto attr : varLayout->getAllAttrs())
{
@@ -797,18 +822,21 @@ void MetalSourceEmitter::emitSemanticsImpl(IRInst* inst, bool allowOffsets)
m_writer->emit(" [[attribute(");
m_writer->emit(offsetAttr->getOffset());
m_writer->emit(")]]");
+ return;
}
}
- else if (auto semanticAttr = as<IRSemanticAttr>(attr))
+ }
+ for (auto attr : varLayout->getAllAttrs())
+ {
+ if (auto semanticAttr = as<IRSemanticAttr>(attr))
{
auto semanticName = String(semanticAttr->getName()).toUpper();
- _emitUserSemantic(semanticAttr->getName(), semanticAttr->getIndex());
- hasSemanticFromLayout = true;
+ hasSemantic = _emitUserSemantic(semanticAttr->getName(), semanticAttr->getIndex());
}
}
}
- if (!hasSemanticFromLayout)
+ if (!hasSemantic)
{
if (auto semanticDecor = inst->findDecoration<IRSemanticDecoration>())
{
diff --git a/source/slang/slang-emit-metal.h b/source/slang/slang-emit-metal.h
index 8b014d604..32557bf27 100644
--- a/source/slang/slang-emit-metal.h
+++ b/source/slang/slang-emit-metal.h
@@ -76,7 +76,7 @@ protected:
void _emitHLSLDecorationSingleInt(const char* name, IRFunc* entryPoint, IRIntLit* val);
void _emitStageAccessSemantic(IRStageAccessDecoration* decoration, const char* name);
- void _emitUserSemantic(UnownedStringSlice semanticName, IRIntegerValue semanticIndex);
+ bool _emitUserSemantic(UnownedStringSlice semanticName, IRIntegerValue semanticIndex);
bool maybeEmitSystemSemantic(IRInst* inst);
};
diff --git a/source/slang/slang-ir-byte-address-legalize.cpp b/source/slang/slang-ir-byte-address-legalize.cpp
index 38bee566c..dba3ab5f5 100644
--- a/source/slang/slang-ir-byte-address-legalize.cpp
+++ b/source/slang/slang-ir-byte-address-legalize.cpp
@@ -1168,10 +1168,9 @@ struct ByteAddressBufferLegalizationContext
uint64Val, m_builder.getIntValue(m_builder.getUInt64Type(), 32)));
auto loOffset = offset;
auto hiOffset = emitOffsetAddIfNeeded(offset, 4);
- IRInst* storeLoArgs[] = { buffer, loOffset, loVal };
- IRInst* storeHiArgs[] = { buffer, hiOffset, hiVal };
- m_builder.emitIntrinsicInst(m_builder.getVoidType(), kIROp_ByteAddressBufferStore, 3, storeLoArgs);
- m_builder.emitIntrinsicInst(m_builder.getVoidType(), kIROp_ByteAddressBufferStore, 3, storeHiArgs);
+ IRInst* alignment = m_builder.getIntValue(m_builder.getUIntType(), 0);
+ m_builder.emitByteAddressBufferStore(buffer, loOffset, alignment, loVal);
+ m_builder.emitByteAddressBufferStore(buffer, hiOffset, alignment, hiVal);
return SLANG_OK;
}
else if (sizeAlignment.size < 4)
@@ -1202,14 +1201,12 @@ struct ByteAddressBufferLegalizationContext
mask = m_builder.emitBitNot(m_builder.getUIntType(), mask);
auto maskedData = m_builder.emitBitAnd(m_builder.getUIntType(), existingVal, mask);
auto newData = m_builder.emitBitOr(m_builder.getUIntType(), maskedData, shiftedData);
- IRInst* storeArgs[] = { buffer, alignedOffset, newData };
- m_builder.emitIntrinsicInst(m_builder.getVoidType(), kIROp_ByteAddressBufferStore, 3, storeArgs);
+ m_builder.emitByteAddressBufferStore(buffer, alignedOffset, newData);
return SLANG_OK;
}
}
{
- IRInst* storeArgs[] = { buffer, offset, value };
- m_builder.emitIntrinsicInst(m_builder.getVoidType(), kIROp_ByteAddressBufferStore, 3, storeArgs);
+ m_builder.emitByteAddressBufferStore(buffer, offset, value);
return SLANG_OK;
}
}
diff --git a/source/slang/slang-ir-insts.h b/source/slang/slang-ir-insts.h
index bc86fa7ee..4781ea2c3 100644
--- a/source/slang/slang-ir-insts.h
+++ b/source/slang/slang-ir-insts.h
@@ -3926,6 +3926,9 @@ public:
IRInst* emitOutImplicitCast(IRInst* type, IRInst* value);
IRInst* emitInOutImplicitCast(IRInst* type, IRInst* value);
+ IRInst* emitByteAddressBufferStore(IRInst* byteAddressBuffer, IRInst* offset, IRInst* value);
+ IRInst* emitByteAddressBufferStore(IRInst* byteAddressBuffer, IRInst* offset, IRInst* alignment, IRInst* value);
+
IRFunc* createFunc();
IRGlobalVar* createGlobalVar(
IRType* valueType);
diff --git a/source/slang/slang-ir.cpp b/source/slang/slang-ir.cpp
index b6d000d20..22ef4e6be 100644
--- a/source/slang/slang-ir.cpp
+++ b/source/slang/slang-ir.cpp
@@ -3213,6 +3213,18 @@ namespace Slang
return inst;
}
+ IRInst* IRBuilder::emitByteAddressBufferStore(IRInst* byteAddressBuffer, IRInst* offset, IRInst* value)
+ {
+ IRInst* args[] = { byteAddressBuffer, offset, getIntValue(getUIntType(), 0), value};
+ return emitIntrinsicInst(getVoidType(), kIROp_ByteAddressBufferStore, 4, args);
+ }
+
+ IRInst* IRBuilder::emitByteAddressBufferStore(IRInst* byteAddressBuffer, IRInst* offset, IRInst* alignment, IRInst* value)
+ {
+ IRInst* args[] = { byteAddressBuffer, offset, alignment, value };
+ return emitIntrinsicInst(getVoidType(), kIROp_ByteAddressBufferStore, 4, args);
+ }
+
IRInst* IRBuilder::emitReinterpret(IRInst* type, IRInst* value)
{
return emitIntrinsicInst((IRType*)type, kIROp_Reinterpret, 1, &value);