3 files changed, 121 insertions, 0 deletions
diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang
index 86e77c650..73bdee96e 100644
--- a/source/slang/hlsl.meta.slang
+++ b/source/slang/hlsl.meta.slang
@@ -8884,6 +8884,12 @@ __generic<T : __BuiltinFloatingPointType, let N : int>
 [require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, sm_4_0_version)]
 T dot(vector<T, N> x, vector<T, N> y)
 {
+    // Handle 1-vector case explicitly since many backends don't support it natively
+    if (N == 1)
+    {
+        return x[0] * y[0];
+    }
+    
     __target_switch
     {
     case glsl: __intrinsic_asm "dot";
@@ -8906,6 +8912,12 @@ __generic<T : __BuiltinIntegerType, let N : int>
 [require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, sm_4_0_version)]
 T dot(vector<T, N> x, vector<T, N> y)
 {
+    // Handle 1-vector case explicitly since many backends don't support it natively
+    if (N == 1)
+    {
+        return x[0] * y[0];
+    }
+    
     __target_switch
     {
     case hlsl: __intrinsic_asm "dot";
diff --git a/tests/compute/dot1-generic.slang b/tests/compute/dot1-generic.slang
new file mode 100644
index 000000000..aed2c9a39
--- /dev/null
+++ b/tests/compute/dot1-generic.slang
@@ -0,0 +1,77 @@
+//TEST:COMPARE_COMPUTE(filecheck-buffer=CHECK):-cpu -output-using-type
+//TEST:COMPARE_COMPUTE(filecheck-buffer=CHECK):-dx12 -output-using-type
+//TEST:COMPARE_COMPUTE(filecheck-buffer=CHECK):-vk -output-using-type
+//TEST:COMPARE_COMPUTE(filecheck-buffer=CHECK):-mtl -output-using-type
+//TEST:COMPARE_COMPUTE(filecheck-buffer=CHECK):-cuda -output-using-type
+//TEST:COMPARE_COMPUTE(filecheck-buffer=CHECK):-wgsl -output-using-type
+
+// Test for dot product with 1-element vectors called from a generic function
+
+// CHECK: 20
+
+//TEST_INPUT:ubuffer(data=[0], stride=4):out,name=outputBuffer
+RWStructuredBuffer<int> outputBuffer;
+
+// Generic function that computes dot product for N-sized float vectors
+__generic<let N : int>
+float genericDotFloat(vector<float, N> a, vector<float, N> b)
+{
+    return dot(a, b);
+}
+
+// Generic function that computes dot product for N-sized int vectors
+__generic<let N : int>
+int genericDotInt(vector<int, N> a, vector<int, N> b)
+{
+    return dot(a, b);
+}
+
+// Generic function for testing with different N values
+__generic<let N : int>
+float testFloatDot(float value)
+{
+    vector<float, N> vec1;
+    vector<float, N> vec2;
+    
+    // Initialize all components to the same value
+    for (int i = 0; i < N; i++)
+    {
+        vec1[i] = value;
+        vec2[i] = value;
+    }
+    
+    return genericDotFloat(vec1, vec2);
+}
+
+// Generic function for testing integer dot products
+__generic<let N : int>
+int testIntDot(int value)
+{
+    vector<int, N> vec1;
+    vector<int, N> vec2;
+    
+    // Initialize all components to the same value
+    for (int i = 0; i < N; i++)
+    {
+        vec1[i] = value;
+        vec2[i] = value;
+    }
+    
+    return genericDotInt(vec1, vec2);
+}
+
+[numthreads(1, 1, 1)]
+void computeMain()
+{
+    // Test with N=1 (single element vectors) - this is the main test case
+    float floatResult1 = testFloatDot<1>(3.0);  // 3.0 * 3.0 = 9.0
+    int intResult1 = testIntDot<1>(3);          // 3 * 3 = 9
+    
+    // Test with N=2 to ensure generic function works for other sizes
+    float floatResult2 = testFloatDot<2>(1.0);  // (1.0*1.0 + 1.0*1.0) = 2.0
+    
+    // Sum all results: 9 + 9 + 2 = 20
+    int result = int(floatResult1) + intResult1 + int(floatResult2);
+    
+    outputBuffer[0] = result;
+}
diff --git a/tests/compute/dot1.slang b/tests/compute/dot1.slang
new file mode 100644
index 000000000..d6022318d
--- /dev/null
+++ b/tests/compute/dot1.slang
@@ -0,0 +1,32 @@
+//TEST:COMPARE_COMPUTE(filecheck-buffer=CHECK):-cpu -output-using-type
+//TEST:COMPARE_COMPUTE(filecheck-buffer=CHECK):-dx12 -output-using-type
+//TEST:COMPARE_COMPUTE(filecheck-buffer=CHECK):-vk -output-using-type
+//TEST:COMPARE_COMPUTE(filecheck-buffer=CHECK):-mtl -output-using-type
+//TEST:COMPARE_COMPUTE(filecheck-buffer=CHECK):-cuda -output-using-type
+//TEST:COMPARE_COMPUTE(filecheck-buffer=CHECK):-wgsl -output-using-type
+
+// Test for dot product with 1-element vectors (float and int)
+
+// CHECK: 8
+
+//TEST_INPUT:ubuffer(data=[0], stride=4):out,name=outputBuffer
+RWStructuredBuffer<int> outputBuffer;
+
+[numthreads(1, 1, 1)]
+void computeMain()
+{
+    // Float dot product with 1-element vectors
+    vector<float, 1> floatVec1 = vector<float, 1>(2.0);
+    vector<float, 1> floatVec2 = vector<float, 1>(2.0);
+    float floatDot = dot(floatVec1, floatVec2); // 2.0 * 2.0 = 4.0
+    
+    // Int dot product with 1-element vectors
+    vector<int, 1> intVec1 = vector<int, 1>(2);
+    vector<int, 1> intVec2 = vector<int, 1>(2);
+    int intDot = dot(intVec1, intVec2); // 2 * 2 = 4
+    
+    // Add them together and convert to int
+    int result = int(floatDot) + intDot; // 4 + 4 = 8
+    
+    outputBuffer[0] = result;
+}