Add arguments for controlling floating point denormal mode (#7461)

* Implement -fp-denorm-mode slangc arg * Split fp-denorm-mode into 3 args for fp16/32/64 * Remove redundant option categories * Use emitInst for multiple of the same OpExecutionMode * Fix formatting * Remove -denorm any * Re-add option categories * emitinst for ftz * Use enums for type text * Remove extra categories again * Add tests for denorm mode * Move denorm mode to post linking * format code (#8) Co-authored-by: slangbot <186143334+slangbot@users.noreply.github.com> * regenerate command line reference (#9) Co-authored-by: slangbot <186143334+slangbot@users.noreply.github.com> * Clean up tests * Fix option text * format code (#10) Co-authored-by: slangbot <186143334+slangbot@users.noreply.github.com> * Add tests for "any" mode * Return "any" enum if option not set * Simplify emission logic * Add support for generic entrypoints * Move denorm modes to end of CompilerOptionName enum * format code (#11) Co-authored-by: slangbot <186143334+slangbot@users.noreply.github.com> * Move new enum members to before CountOf * Add not checks to tests, fix generic test, add functionality tests * Rename denorm to fpDenormal * Clean up functional test * Rename denorm test dir * Fix formatting, regenerate cmdline ref * Fold simple tests into functional tests, add more dxil checks * Remove no-op DX tests, make tests more consistent * Disable VK functionality tests that will fail on the CI configs * Fix formatting * Add comments to disabled tests explaining why --------- Co-authored-by: slangbot <ellieh+slangbot@nvidia.com> Co-authored-by: slangbot <186143334+slangbot@users.noreply.github.com>
author: aidanfnv <aidanf@nvidia.com> 2025-07-01 00:41:52 -0700
committer: GitHub <noreply@github.com> 2025-07-01 07:41:52 +0000
commit: d50c3f34a2eda5bf5e278c78d32cc9923fd83b82 (patch)
tree: 47a1499f4e7375470d1776311e5344b7d20f841b
parent: 6231a6830880f650e444405b670ed7cc0987184b (diff)
19 files changed, 604 insertions, 0 deletions
diff --git a/docs/command-line-slangc-reference.md b/docs/command-line-slangc-reference.md
index 30a0a5e6a..3a0b9acd1 100644
--- a/docs/command-line-slangc-reference.md
+++ b/docs/command-line-slangc-reference.md
@@ -27,6 +27,7 @@ slangc -help-style markdown -h
 * [line-directive-mode](#line-directive-mode)
 * [debug-info-format](#debug-info-format)
 * [fp-mode](#fp-mode)
+* [fp-denormal-mode](#fp-denormal-mode)
 * [help-style](#help-style)
 * [optimization-level](#optimization-level)
 * [debug-level](#debug-level)
@@ -396,6 +397,30 @@ Disables generics and specialization pass.
 Control floating point optimizations 
 
 
+<a id="denorm-mode-fp16"></a>
+### -denorm-mode-fp16
+
+**-denorm-mode-fp16 &lt;[fp-denormal-mode](#fp-denormal-mode)&gt;**
+
+Control handling of 16-bit denormal floating point values in SPIR-V (any, preserve, ftz) 
+
+
+<a id="denorm-mode-fp32"></a>
+### -denorm-mode-fp32
+
+**-denorm-mode-fp32 &lt;[fp-denormal-mode](#fp-denormal-mode)&gt;**
+
+Control handling of 32-bit denormal floating point values in SPIR-V and DXIL (any, preserve, ftz) 
+
+
+<a id="denorm-mode-fp64"></a>
+### -denorm-mode-fp64
+
+**-denorm-mode-fp64 &lt;[fp-denormal-mode](#fp-denormal-mode)&gt;**
+
+Control handling of 64-bit denormal floating point values in SPIR-V (any, preserve, ftz) 
+
+
 <a id="g"></a>
 ### -g
 
@@ -962,6 +987,15 @@ Floating Point Mode
 * `fast` : Allow optimizations that may change results of floating-point computations. Prefer the fastest version of special functions supported by the target. 
 * `default` : Default floating point mode 
 
+<a id="fp-denormal-mode"></a>
+## fp-denormal-mode
+
+Floating Point Denormal Handling Mode 
+
+* `any` : Use any denormal handling mode (default). The mode used is implementation defined. 
+* `preserve` : Preserve denormal values 
+* `ftz` : Flush denormals to zero 
+
 <a id="help-style"></a>
 ## help-style
 
diff --git a/include/slang.h b/include/slang.h
index 0ee9496ef..782c4a082 100644
--- a/include/slang.h
+++ b/include/slang.h
@@ -737,6 +737,17 @@ typedef uint32_t SlangSizeT;
     };
 
     /*!
+    @brief Options to control floating-point denormal handling mode for a target.
+    */
+    typedef unsigned int SlangFpDenormalModeIntegral;
+    enum SlangFpDenormalMode : SlangFpDenormalModeIntegral
+    {
+        SLANG_FP_DENORM_MODE_ANY = 0,
+        SLANG_FP_DENORM_MODE_PRESERVE,
+        SLANG_FP_DENORM_MODE_FTZ,
+    };
+
+    /*!
     @brief Options to control emission of `#line` directives
     */
     typedef unsigned int SlangLineDirectiveModeIntegral;
@@ -1026,6 +1037,12 @@ typedef uint32_t SlangSizeT;
         DumpModule,
 
         EmitSeparateDebug, // bool
+
+        // Floating point denormal handling modes
+        DenormalModeFp16,
+        DenormalModeFp32,
+        DenormalModeFp64,
+
         CountOf,
     };
 
diff --git a/source/compiler-core/slang-downstream-compiler.h b/source/compiler-core/slang-downstream-compiler.h
index c23a6eff0..6ffcf7aea 100644
--- a/source/compiler-core/slang-downstream-compiler.h
+++ b/source/compiler-core/slang-downstream-compiler.h
@@ -197,6 +197,13 @@ struct DownstreamCompileOptions
         Precise,
     };
 
+    enum class FloatingPointDenormalMode : uint8_t
+    {
+        Any,
+        Preserve,
+        FlushToZero,
+    };
+
     enum PipelineType : uint8_t
     {
         Unknown,
@@ -277,6 +284,11 @@ struct DownstreamCompileOptions
 
     // The debug info format to use.
     SlangDebugInfoFormat m_debugInfoFormat = SLANG_DEBUG_INFO_FORMAT_DEFAULT;
+
+    // The floating point denormal handling mode to use for each floating point precision
+    FloatingPointDenormalMode denormalModeFp16 = FloatingPointDenormalMode::Any;
+    FloatingPointDenormalMode denormalModeFp32 = FloatingPointDenormalMode::Any;
+    FloatingPointDenormalMode denormalModeFp64 = FloatingPointDenormalMode::Any;
 };
 static_assert(std::is_trivially_copyable_v<DownstreamCompileOptions>);
 
@@ -482,6 +494,7 @@ struct DownstreamCompilerUtilBase
     typedef CompileOptions::DebugInfoType DebugInfoType;
 
     typedef CompileOptions::FloatingPointMode FloatingPointMode;
+    typedef CompileOptions::FloatingPointDenormalMode FloatingPointDenormalMode;
 
     typedef DownstreamProductFlag ProductFlag;
     typedef DownstreamProductFlags ProductFlags;
diff --git a/source/compiler-core/slang-dxc-compiler.cpp b/source/compiler-core/slang-dxc-compiler.cpp
index 0d4bc0a59..e27a0fc37 100644
--- a/source/compiler-core/slang-dxc-compiler.cpp
+++ b/source/compiler-core/slang-dxc-compiler.cpp
@@ -512,6 +512,22 @@ SlangResult DXCDownstreamCompiler::compile(const CompileOptions& inOptions, IArt
         break;
     }
 
+    switch (options.denormalModeFp32)
+    {
+    default:
+    case CompileOptions::FloatingPointDenormalMode::Any:
+        break;
+
+    case CompileOptions::FloatingPointDenormalMode::Preserve:
+        args.add(L"-denorm");
+        args.add(L"preserve");
+        break;
+
+    case CompileOptions::FloatingPointDenormalMode::FlushToZero:
+        args.add(L"-denorm");
+        args.add(L"ftz");
+        break;
+    }
 
     switch (options.optimizationLevel)
     {
diff --git a/source/core/slang-type-text-util.cpp b/source/core/slang-type-text-util.cpp
index 9f55b69e2..39b68db45 100644
--- a/source/core/slang-type-text-util.cpp
+++ b/source/core/slang-type-text-util.cpp
@@ -171,6 +171,14 @@ static const NamesDescriptionValue s_floatingPointModes[] = {
      "by the target."},
     {SLANG_FLOATING_POINT_MODE_DEFAULT, "default", "Default floating point mode"}};
 
+static const NamesDescriptionValue s_fpDenormalModes[] = {
+    {SLANG_FP_DENORM_MODE_ANY,
+     "any",
+     "Use any denormal handling mode (default). The mode used is implementation defined."},
+    {SLANG_FP_DENORM_MODE_PRESERVE, "preserve", "Preserve denormal values"},
+    {SLANG_FP_DENORM_MODE_FTZ, "ftz", "Flush denormals to zero"},
+};
+
 static const NamesDescriptionValue s_optimizationLevels[] = {
     {SLANG_OPTIMIZATION_LEVEL_NONE, "0,none", "Disable all optimizations"},
     {SLANG_OPTIMIZATION_LEVEL_DEFAULT,
@@ -253,6 +261,11 @@ static const NamesDescriptionValue s_fileSystemTypes[] = {
     return makeConstArrayView(s_floatingPointModes);
 }
 
+/* static */ ConstArrayView<NamesDescriptionValue> TypeTextUtil::getFpDenormalModeInfos()
+{
+    return makeConstArrayView(s_fpDenormalModes);
+}
+
 /* static */ ConstArrayView<NamesDescriptionValue> TypeTextUtil::getOptimizationLevelInfos()
 {
     return makeConstArrayView(s_optimizationLevels);
diff --git a/source/core/slang-type-text-util.h b/source/core/slang-type-text-util.h
index eddbcec5e..684d109c3 100644
--- a/source/core/slang-type-text-util.h
+++ b/source/core/slang-type-text-util.h
@@ -45,6 +45,8 @@ struct TypeTextUtil
     static ConstArrayView<NamesDescriptionValue> getDebugLevelInfos();
     /// Get the floating point modes
     static ConstArrayView<NamesDescriptionValue> getFloatingPointModeInfos();
+    /// Get the floating point denormal handling modes
+    static ConstArrayView<NamesDescriptionValue> getFpDenormalModeInfos();
     // Get the line directive infos
     static ConstArrayView<NamesDescriptionValue> getLineDirectiveInfos();
     /// Get the optimization level info
diff --git a/source/slang/slang-compiler-options.h b/source/slang/slang-compiler-options.h
index 7205e1696..5986c4e82 100644
--- a/source/slang/slang-compiler-options.h
+++ b/source/slang/slang-compiler-options.h
@@ -14,6 +14,7 @@ using slang::CompilerOptionValueKind;
 enum MatrixLayoutMode : SlangMatrixLayoutModeIntegral;
 enum class LineDirectiveMode : SlangLineDirectiveModeIntegral;
 enum class FloatingPointMode : SlangFloatingPointModeIntegral;
+enum class FloatingPointDenormalMode : SlangFpDenormalModeIntegral;
 enum class OptimizationLevel : SlangOptimizationLevelIntegral;
 enum class DebugInfoLevel : SlangDebugInfoLevelIntegral;
 enum class CodeGenTarget : SlangCompileTargetIntegral;
@@ -375,6 +376,33 @@ struct CompilerOptionSet
         return getEnumOption<FloatingPointMode>(CompilerOptionName::FloatingPointMode);
     }
 
+    FloatingPointDenormalMode getDenormalModeFp16()
+    {
+        if (!hasOption(CompilerOptionName::DenormalModeFp16))
+        {
+            return (FloatingPointDenormalMode)SLANG_FP_DENORM_MODE_ANY;
+        }
+        return getEnumOption<FloatingPointDenormalMode>(CompilerOptionName::DenormalModeFp16);
+    }
+
+    FloatingPointDenormalMode getDenormalModeFp32()
+    {
+        if (!hasOption(CompilerOptionName::DenormalModeFp32))
+        {
+            return (FloatingPointDenormalMode)SLANG_FP_DENORM_MODE_ANY;
+        }
+        return getEnumOption<FloatingPointDenormalMode>(CompilerOptionName::DenormalModeFp32);
+    }
+
+    FloatingPointDenormalMode getDenormalModeFp64()
+    {
+        if (!hasOption(CompilerOptionName::DenormalModeFp64))
+        {
+            return (FloatingPointDenormalMode)SLANG_FP_DENORM_MODE_ANY;
+        }
+        return getEnumOption<FloatingPointDenormalMode>(CompilerOptionName::DenormalModeFp64);
+    }
+
     LineDirectiveMode getLineDirectiveMode()
     {
         return getEnumOption<LineDirectiveMode>(CompilerOptionName::LineDirectiveMode);
diff --git a/source/slang/slang-compiler.cpp b/source/slang/slang-compiler.cpp
index dc202c3b0..e31918d58 100644
--- a/source/slang/slang-compiler.cpp
+++ b/source/slang/slang-compiler.cpp
@@ -1740,6 +1740,69 @@ SlangResult CodeGenContext::emitWithDownstreamForEntryPoints(ComPtr<IArtifact>&
             SLANG_ASSERT(!"Unhandled floating point mode");
         }
 
+        if (getTargetProgram()->getOptionSet().hasOption(CompilerOptionName::DenormalModeFp16))
+        {
+            switch (getTargetProgram()->getOptionSet().getEnumOption<FloatingPointDenormalMode>(
+                CompilerOptionName::DenormalModeFp16))
+            {
+            case FloatingPointDenormalMode::Any:
+                options.denormalModeFp16 = DownstreamCompileOptions::FloatingPointDenormalMode::Any;
+                break;
+            case FloatingPointDenormalMode::Preserve:
+                options.denormalModeFp16 =
+                    DownstreamCompileOptions::FloatingPointDenormalMode::Preserve;
+                break;
+            case FloatingPointDenormalMode::FlushToZero:
+                options.denormalModeFp16 =
+                    DownstreamCompileOptions::FloatingPointDenormalMode::FlushToZero;
+                break;
+            default:
+                SLANG_ASSERT(!"Unhandled fp16 denormal handling mode");
+            }
+        }
+
+        if (getTargetProgram()->getOptionSet().hasOption(CompilerOptionName::DenormalModeFp32))
+        {
+            switch (getTargetProgram()->getOptionSet().getEnumOption<FloatingPointDenormalMode>(
+                CompilerOptionName::DenormalModeFp32))
+            {
+            case FloatingPointDenormalMode::Any:
+                options.denormalModeFp32 = DownstreamCompileOptions::FloatingPointDenormalMode::Any;
+                break;
+            case FloatingPointDenormalMode::Preserve:
+                options.denormalModeFp32 =
+                    DownstreamCompileOptions::FloatingPointDenormalMode::Preserve;
+                break;
+            case FloatingPointDenormalMode::FlushToZero:
+                options.denormalModeFp32 =
+                    DownstreamCompileOptions::FloatingPointDenormalMode::FlushToZero;
+                break;
+            default:
+                SLANG_ASSERT(!"Unhandled fp32 denormal handling mode");
+            }
+        }
+
+        if (getTargetProgram()->getOptionSet().hasOption(CompilerOptionName::DenormalModeFp64))
+        {
+            switch (getTargetProgram()->getOptionSet().getEnumOption<FloatingPointDenormalMode>(
+                CompilerOptionName::DenormalModeFp64))
+            {
+            case FloatingPointDenormalMode::Any:
+                options.denormalModeFp64 = DownstreamCompileOptions::FloatingPointDenormalMode::Any;
+                break;
+            case FloatingPointDenormalMode::Preserve:
+                options.denormalModeFp64 =
+                    DownstreamCompileOptions::FloatingPointDenormalMode::Preserve;
+                break;
+            case FloatingPointDenormalMode::FlushToZero:
+                options.denormalModeFp64 =
+                    DownstreamCompileOptions::FloatingPointDenormalMode::FlushToZero;
+                break;
+            default:
+                SLANG_ASSERT(!"Unhandled fp64 denormal handling mode");
+            }
+        }
+
         {
             // We need to look at the stage of the entry point(s) we are
             // being asked to compile, since this will determine the
diff --git a/source/slang/slang-compiler.h b/source/slang/slang-compiler.h
index 7071e4c73..57c20aed2 100644
--- a/source/slang/slang-compiler.h
+++ b/source/slang/slang-compiler.h
@@ -2037,6 +2037,13 @@ enum class FloatingPointMode : SlangFloatingPointModeIntegral
     Precise = SLANG_FLOATING_POINT_MODE_PRECISE,
 };
 
+enum class FloatingPointDenormalMode : SlangFpDenormalModeIntegral
+{
+    Any = SLANG_FP_DENORM_MODE_ANY,
+    Preserve = SLANG_FP_DENORM_MODE_PRESERVE,
+    FlushToZero = SLANG_FP_DENORM_MODE_FTZ,
+};
+
 enum class WriterChannel : SlangWriterChannelIntegral
 {
     Diagnostic = SLANG_WRITER_CHANNEL_DIAGNOSTIC,
diff --git a/source/slang/slang-emit-spirv.cpp b/source/slang/slang-emit-spirv.cpp
index dc5352041..52e8e3d65 100644
--- a/source/slang/slang-emit-spirv.cpp
+++ b/source/slang/slang-emit-spirv.cpp
@@ -5121,6 +5121,42 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex
                 }
             }
             break;
+        case kIROp_FpDenormalPreserveDecoration:
+            {
+                auto fpDenormalDecor = cast<IRFpDenormalPreserveDecoration>(decoration);
+                auto width = int32_t(getIntVal(fpDenormalDecor->getWidth()));
+                ensureExtensionDeclaration(UnownedStringSlice("SPV_KHR_float_controls"));
+                requireSPIRVCapability(SpvCapabilityDenormPreserve);
+                // emitInst is used instead of requireSPIRVExecutionMode because
+                // we need to be able to emit the same execution mode with different
+                // operands for different widths
+                emitInst(
+                    getSection(SpvLogicalSectionID::ExecutionModes),
+                    decoration,
+                    SpvOpExecutionMode,
+                    dstID,
+                    SpvExecutionModeDenormPreserve,
+                    SpvLiteralInteger::from32(width));
+            }
+            break;
+        case kIROp_FpDenormalFlushToZeroDecoration:
+            {
+                auto fpDenormalDecor = cast<IRFpDenormalFlushToZeroDecoration>(decoration);
+                auto width = int32_t(getIntVal(fpDenormalDecor->getWidth()));
+                ensureExtensionDeclaration(UnownedStringSlice("SPV_KHR_float_controls"));
+                requireSPIRVCapability(SpvCapabilityDenormFlushToZero);
+                // emitInst is used instead of requireSPIRVExecutionMode because
+                // we need to be able to emit the same execution mode with different
+                // operands for different widths
+                emitInst(
+                    getSection(SpvLogicalSectionID::ExecutionModes),
+                    decoration,
+                    SpvOpExecutionMode,
+                    dstID,
+                    SpvExecutionModeDenormFlushToZero,
+                    SpvLiteralInteger::from32(width));
+            }
+            break;
         case kIROp_MaxVertexCountDecoration:
             // Don't do anything here, instead wait until we see OutputTopologyDecoration
             // and emit them together to ensure MaxVertexCount always appears before
diff --git a/source/slang/slang-emit.cpp b/source/slang/slang-emit.cpp
index f4d535466..f92eedaa4 100644
--- a/source/slang/slang-emit.cpp
+++ b/source/slang/slang-emit.cpp
@@ -617,6 +617,85 @@ static void unexportNonEmbeddableIR(CodeGenTarget target, IRModule* irModule)
     }
 }
 
+// Add DenormPreserve and DenormFlushToZero decorations to all entry point functions
+static void addDenormalModeDecorations(IRModule* irModule, CodeGenContext* codeGenContext)
+{
+    auto optionSet = codeGenContext->getTargetProgram()->getOptionSet();
+
+    // Only add decorations if we have floating point denormal handling mode options set
+    auto denormalModeFp16 = optionSet.getDenormalModeFp16();
+    auto denormalModeFp32 = optionSet.getDenormalModeFp32();
+    auto denormalModeFp64 = optionSet.getDenormalModeFp64();
+
+    if (denormalModeFp16 == FloatingPointDenormalMode::Any &&
+        denormalModeFp32 == FloatingPointDenormalMode::Any &&
+        denormalModeFp64 == FloatingPointDenormalMode::Any)
+        return;
+
+    IRBuilder builder(irModule);
+
+    // Apply floating point denormal handling mode decorations to all entry point functions
+    for (auto inst : irModule->getGlobalInsts())
+    {
+        IRFunc* func = nullptr;
+
+        // Check if this is a direct function
+        if (auto directFunc = as<IRFunc>(inst))
+        {
+            func = directFunc;
+        }
+        // Check if this is a generic that contains an entry point function
+        else if (auto generic = as<IRGeneric>(inst))
+        {
+            if (auto innerFunc = as<IRFunc>(findGenericReturnVal(generic)))
+            {
+                func = innerFunc;
+            }
+        }
+
+        if (!func)
+            continue;
+
+        // Check if this is an entry point function
+        auto entryPoint = func->findDecoration<IREntryPointDecoration>();
+        if (!entryPoint)
+            continue;
+
+        // Handle FP16 denormal handling mode
+        auto width16 = builder.getIntValue(builder.getUIntType(), 16);
+        if (denormalModeFp16 == FloatingPointDenormalMode::Preserve)
+        {
+            builder.addFpDenormalPreserveDecoration(func, width16);
+        }
+        else if (denormalModeFp16 == FloatingPointDenormalMode::FlushToZero)
+        {
+            builder.addFpDenormalFlushToZeroDecoration(func, width16);
+        }
+
+        // Handle FP32 denormal handling mode
+        auto width32 = builder.getIntValue(builder.getUIntType(), 32);
+        if (denormalModeFp32 == FloatingPointDenormalMode::Preserve)
+        {
+            builder.addFpDenormalPreserveDecoration(func, width32);
+        }
+        else if (denormalModeFp32 == FloatingPointDenormalMode::FlushToZero)
+        {
+            builder.addFpDenormalFlushToZeroDecoration(func, width32);
+        }
+
+        // Handle FP64 denormal handling mode
+        auto width64 = builder.getIntValue(builder.getUIntType(), 64);
+        if (denormalModeFp64 == FloatingPointDenormalMode::Preserve)
+        {
+            builder.addFpDenormalPreserveDecoration(func, width64);
+        }
+        else if (denormalModeFp64 == FloatingPointDenormalMode::FlushToZero)
+        {
+            builder.addFpDenormalFlushToZeroDecoration(func, width64);
+        }
+    }
+}
+
 // Helper function to convert a 20 byte SHA1 to a hexadecimal string,
 // needed for the build identifier instruction.
 String getBuildIdentifierString(ComponentType* component)
@@ -755,6 +834,15 @@ Result linkAndOptimizeIR(
 
     checkEntryPointDecorations(irModule, target, sink);
 
+    // Add floating point denormal handling mode decorations to entry point functions based on
+    // compiler options. This is done post-linking to ensure all entry points from linked modules
+    // are processed.
+    addDenormalModeDecorations(irModule, codeGenContext);
+#if 0
+    dumpIRIfEnabled(codeGenContext, irModule, "FP DENORMAL MODE DECORATIONS ADDED");
+#endif
+    validateIRModuleIfEnabled(codeGenContext, irModule);
+
     // Another transformation that needed to wait until we
     // had layout information on parameters is to take uniform
     // parameters of a shader entry point and move them into
diff --git a/source/slang/slang-ir-inst-defs.h b/source/slang/slang-ir-inst-defs.h
index e13a623bc..d3db24d20 100644
--- a/source/slang/slang-ir-inst-defs.h
+++ b/source/slang/slang-ir-inst-defs.h
@@ -879,6 +879,8 @@ INST_RANGE(BindingQuery, GetRegisterIndex, GetRegisterSpace)
     INST(MaxVertexCountDecoration,          maxVertexCount,         1, 0)
     INST(InstanceDecoration,                instance,               1, 0)
     INST(NumThreadsDecoration,              numThreads,             3, 0)
+    INST(FpDenormalPreserveDecoration,      fpDenormalPreserve,     1, 0)
+    INST(FpDenormalFlushToZeroDecoration,   fpDenormalFlushToZero,  1, 0)
     INST(WaveSizeDecoration,                waveSize,               1, 0)
 
     INST(AvailableInDownstreamIRDecoration, availableInDownstreamIR, 1, 0)
diff --git a/source/slang/slang-ir-insts.h b/source/slang/slang-ir-insts.h
index 14480083d..bb57c082c 100644
--- a/source/slang/slang-ir-insts.h
+++ b/source/slang/slang-ir-insts.h
@@ -643,6 +643,28 @@ struct IRNumThreadsDecoration : IRDecoration
     IRGlobalParam* getZSpecConst() { return as<IRGlobalParam>(getOperand(2)); }
 };
 
+struct IRFpDenormalPreserveDecoration : IRDecoration
+{
+    enum
+    {
+        kOp = kIROp_FpDenormalPreserveDecoration
+    };
+    IR_LEAF_ISA(FpDenormalPreserveDecoration)
+
+    IRIntLit* getWidth() { return cast<IRIntLit>(getOperand(0)); }
+};
+
+struct IRFpDenormalFlushToZeroDecoration : IRDecoration
+{
+    enum
+    {
+        kOp = kIROp_FpDenormalFlushToZeroDecoration
+    };
+    IR_LEAF_ISA(FpDenormalFlushToZeroDecoration)
+
+    IRIntLit* getWidth() { return cast<IRIntLit>(getOperand(0)); }
+};
+
 struct IRWaveSizeDecoration : IRDecoration
 {
     enum
@@ -4138,6 +4160,8 @@ public:
     IRInst* addFloatingModeOverrideDecoration(IRInst* dest, FloatingPointMode mode);
 
     IRInst* addNumThreadsDecoration(IRInst* inst, IRInst* x, IRInst* y, IRInst* z);
+    IRInst* addFpDenormalPreserveDecoration(IRInst* inst, IRInst* width);
+    IRInst* addFpDenormalFlushToZeroDecoration(IRInst* inst, IRInst* width);
     IRInst* addWaveSizeDecoration(IRInst* inst, IRInst* numLanes);
 
     IRInst* emitSpecializeInst(
diff --git a/source/slang/slang-ir.cpp b/source/slang/slang-ir.cpp
index f42cfc7f2..6e7e573b8 100644
--- a/source/slang/slang-ir.cpp
+++ b/source/slang/slang-ir.cpp
@@ -5633,6 +5633,20 @@ IRInst* IRBuilder::addNumThreadsDecoration(IRInst* inst, IRInst* x, IRInst* y, I
     return addDecoration(inst, kIROp_NumThreadsDecoration, operands, 3);
 }
 
+IRInst* IRBuilder::addFpDenormalPreserveDecoration(IRInst* inst, IRInst* width)
+{
+    IRInst* operands[1] = {width};
+
+    return addDecoration(inst, kIROp_FpDenormalPreserveDecoration, operands, 1);
+}
+
+IRInst* IRBuilder::addFpDenormalFlushToZeroDecoration(IRInst* inst, IRInst* width)
+{
+    IRInst* operands[1] = {width};
+
+    return addDecoration(inst, kIROp_FpDenormalFlushToZeroDecoration, operands, 1);
+}
+
 IRInst* IRBuilder::addWaveSizeDecoration(IRInst* inst, IRInst* numLanes)
 {
     IRInst* operands[1] = {numLanes};
diff --git a/source/slang/slang-options.cpp b/source/slang/slang-options.cpp
index 3227e2de4..9141188df 100644
--- a/source/slang/slang-options.cpp
+++ b/source/slang/slang-options.cpp
@@ -53,6 +53,7 @@ enum class ValueCategory
     Target,
     Language,
     FloatingPointMode,
+    FloatingPointDenormalMode,
     ArchiveType,
     Stage,
     LineDirectiveMode,
@@ -85,6 +86,7 @@ SLANG_GET_VALUE_CATEGORY(Compiler, SlangPassThrough)
 SLANG_GET_VALUE_CATEGORY(ArchiveType, SlangArchiveType)
 SLANG_GET_VALUE_CATEGORY(LineDirectiveMode, SlangLineDirectiveMode)
 SLANG_GET_VALUE_CATEGORY(FloatingPointMode, FloatingPointMode)
+SLANG_GET_VALUE_CATEGORY(FloatingPointDenormalMode, FloatingPointDenormalMode)
 SLANG_GET_VALUE_CATEGORY(FileSystemType, TypeTextUtil::FileSystemType)
 SLANG_GET_VALUE_CATEGORY(HelpStyle, CommandOptionsWriter::Style)
 SLANG_GET_VALUE_CATEGORY(OptimizationLevel, SlangOptimizationLevel)
@@ -186,6 +188,13 @@ void initCommandOptions(CommandOptions& options)
 
         options.addCategory(
             CategoryKind::Value,
+            "fp-denormal-mode",
+            "Floating Point Denormal Handling Mode",
+            UserValue(ValueCategory::FloatingPointDenormalMode));
+        options.addValues(TypeTextUtil::getFpDenormalModeInfos());
+
+        options.addCategory(
+            CategoryKind::Value,
             "help-style",
             "Help Style",
             UserValue(ValueCategory::HelpStyle));
@@ -580,6 +589,21 @@ void initCommandOptions(CommandOptions& options)
          "-fp-mode,-floating-point-mode",
          "-fp-mode <fp-mode>, -floating-point-mode <fp-mode>",
          "Control floating point optimizations"},
+        {OptionKind::DenormalModeFp16,
+         "-denorm-mode-fp16",
+         "-denorm-mode-fp16 <fp-denormal-mode>",
+         "Control handling of 16-bit denormal floating point values in SPIR-V (any, preserve, "
+         "ftz)"},
+        {OptionKind::DenormalModeFp32,
+         "-denorm-mode-fp32",
+         "-denorm-mode-fp32 <fp-denormal-mode>",
+         "Control handling of 32-bit denormal floating point values in SPIR-V and DXIL (any, "
+         "preserve, ftz)"},
+        {OptionKind::DenormalModeFp64,
+         "-denorm-mode-fp64",
+         "-denorm-mode-fp64 <fp-denormal-mode>",
+         "Control handling of 64-bit denormal floating point values in SPIR-V (any, preserve, "
+         "ftz)"},
         {OptionKind::DebugInformation,
          "-g...",
          "-g, -g<debug-info-format>, -g<debug-level>",
@@ -2802,6 +2826,27 @@ SlangResult OptionsParser::_parse(int argc, char const* const* argv)
                 setFloatingPointMode(getCurrentTarget(), value);
                 break;
             }
+        case OptionKind::DenormalModeFp16:
+            {
+                FloatingPointDenormalMode value;
+                SLANG_RETURN_ON_FAIL(_expectValue(value));
+                linkage->m_optionSet.set(CompilerOptionName::DenormalModeFp16, value);
+                break;
+            }
+        case OptionKind::DenormalModeFp32:
+            {
+                FloatingPointDenormalMode value;
+                SLANG_RETURN_ON_FAIL(_expectValue(value));
+                linkage->m_optionSet.set(CompilerOptionName::DenormalModeFp32, value);
+                break;
+            }
+        case OptionKind::DenormalModeFp64:
+            {
+                FloatingPointDenormalMode value;
+                SLANG_RETURN_ON_FAIL(_expectValue(value));
+                linkage->m_optionSet.set(CompilerOptionName::DenormalModeFp64, value);
+                break;
+            }
         case OptionKind::Optimization:
             {
                 UnownedStringSlice levelSlice = argValue.getUnownedSlice().tail(2);
diff --git a/tests/fp-denormal-mode/denorm-mode-fp16.slang b/tests/fp-denormal-mode/denorm-mode-fp16.slang
new file mode 100644
index 000000000..f770ccbbe
--- /dev/null
+++ b/tests/fp-denormal-mode/denorm-mode-fp16.slang
@@ -0,0 +1,54 @@
+//TEST:SIMPLE(filecheck=CHECK_ANY):-target spirv-assembly -entry computeMain -stage compute
+//TEST:SIMPLE(filecheck=CHECK_ANY):-target spirv-assembly -entry computeMain -stage compute -denorm-mode-fp16 any
+//TEST:SIMPLE(filecheck=CHECK_PRESERVE):-target spirv-assembly -entry computeMain -stage compute -denorm-mode-fp16 preserve
+//TEST:SIMPLE(filecheck=CHECK_FTZ):-target spirv-assembly -entry computeMain -stage compute -denorm-mode-fp16 ftz
+
+//TEST:SIMPLE(filecheck=CHECK_DXIL):-target dxil-assembly -entry computeMain -stage compute -profile cs_6_2
+//TEST:SIMPLE(filecheck=CHECK_DXIL):-target dxil-assembly -entry computeMain -stage compute -profile cs_6_2 -denorm-mode-fp16 any
+//TEST:SIMPLE(filecheck=CHECK_DXIL):-target dxil-assembly -entry computeMain -stage compute -profile cs_6_2 -denorm-mode-fp16 preserve
+//TEST:SIMPLE(filecheck=CHECK_DXIL):-target dxil-assembly -entry computeMain -stage compute -profile cs_6_2 -denorm-mode-fp16 ftz
+
+//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=PRESERVE):-vk -compute -Xslang -denorm-mode-fp16 -Xslang preserve
+// Capability shaderDenormFlushToZeroFloat16 is VK_FALSE on the Vulkan device used for CI testing, resulting in
+// runtime error VUID-RuntimeSpirv-shaderDenormFlushToZeroFloat16-06299 during CI testing
+//DISABLE_TEST(compute):COMPARE_COMPUTE(filecheck-buffer=FTZ):-vk -compute -Xslang -denorm-mode-fp16 -Xslang ftz
+
+// CHECK_ANY-NOT: DenormPreserve
+// CHECK_ANY-NOT: DenormFlushToZero
+
+// CHECK_PRESERVE: OpExecutionMode %computeMain DenormPreserve 16
+// CHECK_PRESERVE-NOT: OpExecutionMode %computeMain DenormPreserve 32
+// CHECK_PRESERVE-NOT: OpExecutionMode %computeMain DenormPreserve 64
+// CHECK_PRESERVE-NOT: DenormFlushToZero
+
+// CHECK_FTZ: OpExecutionMode %computeMain DenormFlushToZero 16
+// CHECK_FTZ-NOT: OpExecutionMode %computeMain DenormFlushToZero 32
+// CHECK_FTZ-NOT: OpExecutionMode %computeMain DenormFlushToZero 64
+// CHECK_FTZ-NOT: DenormPreserve
+
+// CHECK_DXIL-NOT: fp32-denorm-mode
+// CHECK_DXIL-NOT: preserve
+// CHECK_DXIL-NOT: ftz
+
+// In preserve mode, denormalized numbers should be preserved
+// PRESERVE: 66
+
+// In flush-to-zero mode, denormalized numbers should be flushed to zero
+// FTZ: 0
+
+// Smallest normal fp16
+//TEST_INPUT: set inputBuffer = ubuffer(data=[0x0400], stride=2)
+RWStructuredBuffer<half> inputBuffer;
+
+//TEST_INPUT:ubuffer(data=[0], stride=2):out,name=outputBuffer
+RWStructuredBuffer<half> outputBuffer;
+
+[shader("compute")]
+[numthreads(1, 1, 1)]
+void computeMain()
+{
+    half smallestNormal = inputBuffer[0];
+    half denormal = smallestNormal / 10;
+    
+    outputBuffer[0] = denormal;
+}
diff --git a/tests/fp-denormal-mode/denorm-mode-fp32.slang b/tests/fp-denormal-mode/denorm-mode-fp32.slang
new file mode 100644
index 000000000..530d99b6d
--- /dev/null
+++ b/tests/fp-denormal-mode/denorm-mode-fp32.slang
@@ -0,0 +1,64 @@
+//TEST:SIMPLE(filecheck=CHECK_ANY):-target spirv-assembly -entry computeMain -stage compute
+//TEST:SIMPLE(filecheck=CHECK_ANY):-target spirv-assembly -entry computeMain -stage compute -denorm-mode-fp16 any
+//TEST:SIMPLE(filecheck=CHECK_PRESERVE_SPIRV):-target spirv-assembly -entry computeMain -stage compute -denorm-mode-fp32 preserve
+//TEST:SIMPLE(filecheck=CHECK_FTZ_SPIRV):-target spirv-assembly -entry computeMain -stage compute -denorm-mode-fp32 ftz
+
+//TEST:SIMPLE(filecheck=CHECK_ANY):-target dxil-assembly -entry computeMain -stage compute -profile cs_6_2
+//TEST:SIMPLE(filecheck=CHECK_ANY):-target dxil-assembly -entry computeMain -stage compute -profile cs_6_2 -denorm-mode-fp32 any
+//TEST:SIMPLE(filecheck=CHECK_PRESERVE_DXIL):-target dxil-assembly -entry computeMain -stage compute -profile cs_6_2 -denorm-mode-fp32 preserve
+//TEST:SIMPLE(filecheck=CHECK_FTZ_DXIL):-target dxil-assembly -entry computeMain -stage compute -profile cs_6_2 -denorm-mode-fp32 ftz
+
+// Capability shaderDenormPreserveFloat32 is VK_FALSE on the Vulkan device used for CI testing, resulting in
+// runtime error VUID-RuntimeSpirv-shaderDenormPreserveFloat32-06297 during CI testing
+//DISABLE_TEST(compute):COMPARE_COMPUTE(filecheck-buffer=PRESERVE):-vk -compute -Xslang -denorm-mode-fp32 -Xslang preserve
+// Capability shaderDenormFlushToZeroFloat32 is VK_FALSE on the Vulkan device used for CI testing, resulting in
+// runtime error VUID-RuntimeSpirv-shaderDenormFlushToZeroFloat32-06300 during CI testing
+//DISABLE_TEST(compute):COMPARE_COMPUTE(filecheck-buffer=FTZ):-vk -compute -Xslang -denorm-mode-fp32 -Xslang ftz
+
+//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=PRESERVE):-slang -compute -dx12 -use-dxil -profile cs_6_2 -shaderobj -Xslang -denorm-mode-fp32 -Xslang preserve
+//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=FTZ):-slang -compute -dx12 -use-dxil -profile cs_6_2 -Xslang -denorm-mode-fp32 -Xslang ftz
+
+// CHECK_ANY-NOT: DenormPreserve
+// CHECK_ANY-NOT: DenormFlushToZero
+
+// CHECK_PRESERVE_SPIRV: OpExecutionMode %computeMain DenormPreserve 32
+// CHECK_PRESERVE_SPIRV-NOT: OpExecutionMode %computeMain DenormPreserve 16
+// CHECK_PRESERVE_SPIRV-NOT: OpExecutionMode %computeMain DenormPreserve 64
+// CHECK_PRESERVE_SPIRV-NOT: DenormFlushToZero
+
+// CHECK_FTZ_SPIRV: OpExecutionMode %computeMain DenormFlushToZero 32
+// CHECK_FTZ_SPIRV-NOT: OpExecutionMode %computeMain DenormFlushToZero 16
+// CHECK_FTZ_SPIRV-NOT: OpExecutionMode %computeMain DenormFlushToZero 64
+// CHECK_FTZ_SPIRV-NOT: DenormPreserve
+
+// CHECK_ANY-NOT: preserve
+// CHECK_ANY-NOT: ftz
+
+// CHECK_PRESERVE_DXIL: attributes #0 = { "fp32-denorm-mode"="preserve" }
+// CHECK_PRESERVE_DXIL-NOT: ftz
+
+// CHECK_FTZ_DXIL: attributes #0 = { "fp32-denorm-mode"="ftz" }
+// CHECK_FTZ_DXIL-NOT: preserve
+
+// In preserve mode, denormalized numbers should be preserved
+// PRESERVE: CCCCD
+
+// In flush-to-zero mode, denormalized numbers should be flushed to zero
+// FTZ: 0
+
+// Smallest normal fp32
+//TEST_INPUT: set inputBuffer = ubuffer(data=[0x00800000], stride=4)
+RWStructuredBuffer<float> inputBuffer;
+
+//TEST_INPUT:ubuffer(data=[0], stride=4):out,name=outputBuffer
+RWStructuredBuffer<float> outputBuffer;
+
+[shader("compute")]
+[numthreads(1, 1, 1)]
+void computeMain()
+{
+    float smallestNormal = inputBuffer[0];
+    float denormal = smallestNormal / 10;
+    
+    outputBuffer[0] = denormal;
+}
diff --git a/tests/fp-denormal-mode/denorm-mode-fp64.slang b/tests/fp-denormal-mode/denorm-mode-fp64.slang
new file mode 100644
index 000000000..0f4ac6c3c
--- /dev/null
+++ b/tests/fp-denormal-mode/denorm-mode-fp64.slang
@@ -0,0 +1,58 @@
+//TEST:SIMPLE(filecheck=CHECK_ANY):-target spirv-assembly -entry computeMain -stage compute
+//TEST:SIMPLE(filecheck=CHECK_ANY):-target spirv-assembly -entry computeMain -stage compute -denorm-mode-fp64 any
+//TEST:SIMPLE(filecheck=CHECK_PRESERVE):-target spirv-assembly -entry computeMain -stage compute -denorm-mode-fp64 preserve
+//TEST:SIMPLE(filecheck=CHECK_FTZ):-target spirv-assembly -entry computeMain -stage compute -denorm-mode-fp64 ftz
+
+//TEST:SIMPLE(filecheck=CHECK_DXIL):-target dxil-assembly -entry computeMain -stage compute -profile cs_6_2
+//TEST:SIMPLE(filecheck=CHECK_DXIL):-target dxil-assembly -entry computeMain -stage compute -profile cs_6_2 -denorm-mode-fp64 any
+//TEST:SIMPLE(filecheck=CHECK_DXIL):-target dxil-assembly -entry computeMain -stage compute -profile cs_6_2 -denorm-mode-fp64 preserve
+//TEST:SIMPLE(filecheck=CHECK_DXIL):-target dxil-assembly -entry computeMain -stage compute -profile cs_6_2 -denorm-mode-fp64 ftz
+
+// Capability shaderDenormPreserveFloat64 is VK_FALSE on the Vulkan device used for CI testing, resulting in
+// runtime error VUID-RuntimeSpirv-shaderDenormPreserveFloat64-06298 during CI testing
+//DISABLE_TEST(compute):COMPARE_COMPUTE(filecheck-buffer=PRESERVE):-vk -compute -Xslang -denorm-mode-fp64 -Xslang preserve
+// Capability shaderDenormFlushToZeroFloat64 is VK_FALSE on the Vulkan device used for CI testing, resulting in
+// runtime error VUID-RuntimeSpirv-shaderDenormFlushToZeroFloat64-06301 during CI testing
+//DISABLE_TEST(compute):COMPARE_COMPUTE(filecheck-buffer=FTZ):-vk -compute -Xslang -denorm-mode-fp64 -Xslang ftz
+
+// CHECK_ANY-NOT: DenormPreserve
+// CHECK_ANY-NOT: DenormFlushToZero
+
+// CHECK_PRESERVE: OpExecutionMode %computeMain DenormPreserve 64
+// CHECK_PRESERVE-NOT: OpExecutionMode %computeMain DenormPreserve 16
+// CHECK_PRESERVE-NOT: OpExecutionMode %computeMain DenormPreserve 32
+// CHECK_PRESERVE-NOT: DenormFlushToZero
+
+// CHECK_FTZ: OpExecutionMode %computeMain DenormFlushToZero 64
+// CHECK_FTZ-NOT: OpExecutionMode %computeMain DenormFlushToZero 16
+// CHECK_FTZ-NOT: OpExecutionMode %computeMain DenormFlushToZero 32
+// CHECK_FTZ-NOT: DenormPreserve
+
+// CHECK_DXIL-NOT: fp32-denorm-mode
+// CHECK_DXIL-NOT: preserve
+// CHECK_DXIL-NOT: ftz
+
+// In preserve mode, denormalized numbers should be preserved
+// PRESERVE: 9999999A
+// PRESERVE: 19999
+
+// In flush-to-zero mode, denormalized numbers should be flushed to zero
+// FTZ: 0
+// FTZ: 0
+
+// Smallest normal fp32
+//TEST_INPUT: set inputBuffer = ubuffer(data=[0x00100000 0x00000000], stride=8)
+RWStructuredBuffer<uint32_t> inputBuffer;
+
+//TEST_INPUT:ubuffer(data=[0], stride=8):out,name=outputBuffer
+RWStructuredBuffer<double> outputBuffer;
+
+[shader("compute")]
+[numthreads(1, 1, 1)]
+void computeMain()
+{
+    double smallNormal = asdouble(inputBuffer[1], inputBuffer[0]); 
+    double denorm = smallNormal / 10;
+    
+    outputBuffer[0] = denorm;
+}
diff --git a/tests/fp-denormal-mode/denorm-mode-generic.slang b/tests/fp-denormal-mode/denorm-mode-generic.slang
new file mode 100644
index 000000000..e53478d41
--- /dev/null
+++ b/tests/fp-denormal-mode/denorm-mode-generic.slang
@@ -0,0 +1,26 @@
+//TEST:SIMPLE(filecheck=CHECK_ANY):-target spirv-assembly -entry genericEntry<half> -stage compute
+//TEST:SIMPLE(filecheck=CHECK_ANY):-target spirv-assembly -entry genericEntry<float> -stage compute
+//TEST:SIMPLE(filecheck=CHECK_ANY):-target spirv-assembly -entry genericEntry<double> -stage compute
+
+//TEST:SIMPLE(filecheck=CHECK_PRESERVE):-target spirv-assembly -entry genericEntry<half> -stage compute -denorm-mode-fp32 preserve
+//TEST:SIMPLE(filecheck=CHECK_PRESERVE):-target spirv-assembly -entry genericEntry<float> -stage compute -denorm-mode-fp32 preserve
+//TEST:SIMPLE(filecheck=CHECK_PRESERVE):-target spirv-assembly -entry genericEntry<double> -stage compute -denorm-mode-fp32 preserve
+
+//TEST:SIMPLE(filecheck=CHECK_FTZ):-target spirv-assembly -entry genericEntry<half> -stage compute -denorm-mode-fp32 ftz
+//TEST:SIMPLE(filecheck=CHECK_FTZ):-target spirv-assembly -entry genericEntry<float> -stage compute -denorm-mode-fp32 ftz
+//TEST:SIMPLE(filecheck=CHECK_FTZ):-target spirv-assembly -entry genericEntry<double> -stage compute -denorm-mode-fp32 ftz
+
+// CHECK_ANY-NOT: DenormPreserve
+// CHECK_ANY-NOT: DenormFlushToZero
+
+// CHECK_PRESERVE: DenormPreserve 32
+// CHECK_FTZ: DenormFlushToZero 32
+
+[shader("compute")]  
+[numthreads(1, 1, 1)]
+__generic<T : __BuiltinArithmeticType> void genericEntry()
+{
+    T a = T(1);
+    T b = T(2);  
+    T result = a + b;
+}
author	aidanfnv <aidanf@nvidia.com>	2025-07-01 00:41:52 -0700
committer	GitHub <noreply@github.com>	2025-07-01 07:41:52 +0000
commit	d50c3f34a2eda5bf5e278c78d32cc9923fd83b82 (patch)
tree	47a1499f4e7375470d1776311e5344b7d20f841b
parent	6231a6830880f650e444405b670ed7cc0987184b (diff)