20 files changed, 479 insertions, 485 deletions
diff --git a/build/visual-studio/slang/slang.vcxproj b/build/visual-studio/slang/slang.vcxproj
index 2b982f56e..54f406549 100644
--- a/build/visual-studio/slang/slang.vcxproj
+++ b/build/visual-studio/slang/slang.vcxproj
@@ -256,6 +256,7 @@
     <ClInclude Include="..\..\..\source\slang\slang-ir-restructure.h" />
     <ClInclude Include="..\..\..\source\slang\slang-ir-sccp.h" />
     <ClInclude Include="..\..\..\source\slang\slang-ir-specialize-arrays.h" />
+    <ClInclude Include="..\..\..\source\slang\slang-ir-specialize-buffer-load-arg.h" />
     <ClInclude Include="..\..\..\source\slang\slang-ir-specialize-dispatch.h" />
     <ClInclude Include="..\..\..\source\slang\slang-ir-specialize-dynamic-associatedtype-lookup.h" />
     <ClInclude Include="..\..\..\source\slang\slang-ir-specialize-function-call.h" />
@@ -380,6 +381,7 @@
     <ClCompile Include="..\..\..\source\slang\slang-ir-restructure.cpp" />
     <ClCompile Include="..\..\..\source\slang\slang-ir-sccp.cpp" />
     <ClCompile Include="..\..\..\source\slang\slang-ir-specialize-arrays.cpp" />
+    <ClCompile Include="..\..\..\source\slang\slang-ir-specialize-buffer-load-arg.cpp" />
     <ClCompile Include="..\..\..\source\slang\slang-ir-specialize-dispatch.cpp" />
     <ClCompile Include="..\..\..\source\slang\slang-ir-specialize-dynamic-associatedtype-lookup.cpp" />
     <ClCompile Include="..\..\..\source\slang\slang-ir-specialize-function-call.cpp" />
diff --git a/build/visual-studio/slang/slang.vcxproj.filters b/build/visual-studio/slang/slang.vcxproj.filters
index 8580ead0a..25e46742f 100644
--- a/build/visual-studio/slang/slang.vcxproj.filters
+++ b/build/visual-studio/slang/slang.vcxproj.filters
@@ -219,6 +219,9 @@
     <ClInclude Include="..\..\..\source\slang\slang-ir-specialize-arrays.h">
       <Filter>Header Files</Filter>
     </ClInclude>
+    <ClInclude Include="..\..\..\source\slang\slang-ir-specialize-buffer-load-arg.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
     <ClInclude Include="..\..\..\source\slang\slang-ir-specialize-dispatch.h">
       <Filter>Header Files</Filter>
     </ClInclude>
@@ -587,6 +590,9 @@
     <ClCompile Include="..\..\..\source\slang\slang-ir-specialize-arrays.cpp">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\source\slang\slang-ir-specialize-buffer-load-arg.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\source\slang\slang-ir-specialize-dispatch.cpp">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/source/slang/slang-emit.cpp b/source/slang/slang-emit.cpp
index 12dd80135..4a0018770 100644
--- a/source/slang/slang-emit.cpp
+++ b/source/slang/slang-emit.cpp
@@ -25,6 +25,7 @@
 #include "slang-ir-restructure-scoping.h"
 #include "slang-ir-specialize.h"
 #include "slang-ir-specialize-arrays.h"
+#include "slang-ir-specialize-buffer-load-arg.h"
 #include "slang-ir-specialize-resources.h"
 #include "slang-ir-ssa.h"
 #include "slang-ir-strip-witness-tables.h"
@@ -451,6 +452,7 @@ Result linkAndOptimizeIR(
     // pass down the target request along with the IR.
     //
     specializeResourceOutputs(compileRequest, targetRequest, irModule);
+    specializeFuncsForBufferLoadArgs(compileRequest, targetRequest, irModule);
     specializeResourceParameters(compileRequest, targetRequest, irModule);
 
     // For GLSL targets, we also want to specialize calls to functions that
diff --git a/source/slang/slang-ir-insts.h b/source/slang/slang-ir-insts.h
index 904b9955e..267866b1b 100644
--- a/source/slang/slang-ir-insts.h
+++ b/source/slang/slang-ir-insts.h
@@ -1310,6 +1310,8 @@ struct IRLoad : IRInst
 {
     IRUse ptr;
     IR_LEAF_ISA(Load)
+
+    IRInst* getPtr() { return ptr.get(); }
 };
 
 struct IRStore : IRInst
@@ -1317,6 +1319,9 @@ struct IRStore : IRInst
     IRUse ptr;
     IRUse val;
     IR_LEAF_ISA(Store)
+
+    IRInst* getPtr() { return ptr.get(); }
+    IRInst* getVal() { return val.get(); }
 };
 
 struct IRFieldExtract : IRInst
diff --git a/source/slang/slang-ir-specialize-arrays.cpp b/source/slang/slang-ir-specialize-arrays.cpp
index 53e317b81..2ed3da479 100644
--- a/source/slang/slang-ir-specialize-arrays.cpp
+++ b/source/slang/slang-ir-specialize-arrays.cpp
@@ -35,8 +35,9 @@ struct ArrayParameterSpecializationCondition : FunctionCallSpecializeCondition
         return false;
     }
 
-    bool doesParamNeedSpecialization(IRParam* param)
+    bool doesParamWantSpecialization(IRParam* param, IRInst* arg)
     {
+        SLANG_UNUSED(arg);
         return isStructTypeWithArray(param->getDataType());
     }
 };
diff --git a/source/slang/slang-ir-specialize-buffer-load-arg.cpp b/source/slang/slang-ir-specialize-buffer-load-arg.cpp
new file mode 100644
index 000000000..353c6a104
--- /dev/null
+++ b/source/slang/slang-ir-specialize-buffer-load-arg.cpp
@@ -0,0 +1,96 @@
+// slang-ir-specialize-buffer-load-arg.cpp
+#include "slang-ir-specialize-buffer-load-arg.h"
+
+#include "slang-ir.h"
+#include "slang-ir-insts.h"
+#include "slang-ir-specialize-function-call.h"
+
+namespace Slang
+{
+
+// This file implements a pass that translates function call sites where
+// the result of a buffer load from a global shader parameter (e.g., a
+// global constant buffer) is being passed through to the callee. It
+// replaces those with calls to specialized callee functions that directly
+// reference the chosen global.
+//
+// As swith most of our IR passes, we encapsulate the logic here in a context
+// type so that the data that needs to be shared throughout the pass can
+// be conveniently scoped.
+
+struct FuncBufferLoadSpecializationCondition : FunctionCallSpecializeCondition
+{
+    typedef FunctionCallSpecializeCondition Super;
+
+    virtual bool doesParamWantSpecialization(IRParam* param, IRInst* arg)
+    {
+        // We only want to specialize for `struct` types and not base types.
+        //
+        // TODO: We might want to consider some criteria here for the "large-ness"
+        // of a structure (in terms of bytes and/or fields), so that we don't
+        // eliminate loads of sufficiently small types (which are cheap to pass
+        // by value).
+        //
+        auto paramType = param->getDataType();
+        if(!as<IRStructType>(paramType))
+            return false;
+
+        // We also only want to specialize for arguments that are a load
+        // from some kind of global shader parameter.
+        //
+        IRInst* a = arg;
+        if (auto argLoad = as<IRLoad>(arg))
+        {
+            a = argLoad->getPtr();
+        }
+        else
+        {
+            return false;
+        }
+
+        // We want to handle loads from a shader parameter that is an array
+        // of buffers, and not just a single global buffer.
+        //
+        while (auto argGetElement = as<IRGetElement>(a))
+        {
+            a = argGetElement->getBase();
+        }
+
+        // The "root" of the parameter must be a reference to a global-scope
+        // shader parameter, so that we know we can substitute it into the callee.
+        //
+        if (auto argGlobalParam = as<IRGlobalParam>(a))
+        {
+            return true;
+        }
+        else
+        {
+            return false;
+        }
+
+        // TODO: There are other patterns that we could attempt to optimize here.
+        // For example, this logic only handles loads of the *entire* contents of
+        // a buffer, so it would miss:
+        //
+        // * A load of a large structure from field in a constant buffer, so that
+        //   the value loaded is not the entire buffer contents.
+        //
+        // * A load of a large structure from a structured buffer, or any other kind
+        //   of buffer that requires an index.
+        //
+        // * Any resource load that is not expressed at the IR level with a `load`
+        //   instruction (e.g., those that might use an intrinsic function).
+        //
+    }
+};
+
+void specializeFuncsForBufferLoadArgs(
+    BackEndCompileRequest*  compileRequest,
+    TargetRequest*          targetRequest,
+    IRModule*               module)
+{
+    FuncBufferLoadSpecializationCondition condition;
+    specializeFunctionCalls(compileRequest, targetRequest, module, &condition);
+}
+
+}
diff --git a/source/slang/slang-ir-specialize-buffer-load-arg.h b/source/slang/slang-ir-specialize-buffer-load-arg.h
new file mode 100644
index 000000000..9d79a870e
--- /dev/null
+++ b/source/slang/slang-ir-specialize-buffer-load-arg.h
@@ -0,0 +1,44 @@
+// slang-ir-specialize-buffer-load-arg.h
+#pragma once
+
+namespace Slang
+{
+class BackEndCompileRequest;
+class TargetRequest;
+struct IRModule;
+
+
+    /// Specialize functions in `module` that are called with direct loads from buffers.
+    ///
+    /// For example:
+    ///
+    ///     struct Params { /* many fields */ }
+    ///     int helper(Params p, int x) { return p.justOneField + x; }
+    ///     ...
+    ///     ConstantBuffer<Params> gParams;
+    ///     ...
+    ///     int z = helper(gParams, y);
+    ///
+    /// In this case, the function `helper` declares a very large structure type as
+    /// a by-value argument. Depending on the final code-generation target, this could
+    /// result in output code that loads the entire contents of `gParams` before passing
+    /// it to `helper`, which then uses only a single field (rendering the rest of the load
+    /// operations wasted).
+    ///
+    /// This pass is designed to specialize a callee function like `helper` based on call
+    /// sites in this form, so that the output code is:
+    ///
+    ///     struct Params { /* as before */ }
+    ///     ConstantBuffer<Params> gParams;
+    ///     int helper_1(int x) { return gParams.justOneField + x; }
+    ///     ...
+    ///     int z = helper_1(y);
+    ///
+    /// Note how in the transformed code, there is no longer any attempt to load the rest
+    /// of the contents of `gParams`.
+    ///
+void specializeFuncsForBufferLoadArgs(
+    BackEndCompileRequest* compileRequest,
+    TargetRequest* targetRequest,
+    IRModule* module);
+}
diff --git a/source/slang/slang-ir-specialize-function-call.cpp b/source/slang/slang-ir-specialize-function-call.cpp
index eb574c002..0341438c5 100644
--- a/source/slang/slang-ir-specialize-function-call.cpp
+++ b/source/slang/slang-ir-specialize-function-call.cpp
@@ -8,6 +8,61 @@
 namespace Slang
 {
 
+bool FunctionCallSpecializeCondition::isParamSuitableForSpecialization(IRParam* param, IRInst* inArg)
+{
+    SLANG_UNUSED(param);
+
+    // Determining if an argument is suitable for
+    // specializing a callee function requires
+    // looking at its (recurisve) structure.
+    //
+    // Rather than write a recursively procedure
+    // here, we will be tail-recursive by using
+    // a simple loop.
+    //
+    IRInst* arg = inArg;
+    for (;;)
+    {
+        // The leaf case we care about is when the
+        // argument at the call site is a global
+        // shader parameter, because then we can
+        // specialize a callee to refer to the same
+        // global parameter directly.
+        //
+        if (as<IRGlobalParam>(arg)) return true;
+
+        // As we will see later, we can also
+        // specialize a call when the argument
+        // is the result of indexing into an
+        // array (`base[index]`) *if* the `base`
+        // of the indexing operation is also
+        // suitable for specialization.
+        //
+        if (arg->getOp() == kIROp_getElement || arg->getOp() == kIROp_Load)
+        {
+            auto base = arg->getOperand(0);
+
+            // We will "recurse" on the base of
+            // the indexing operation by continuing
+            // our loop with the `base` as our new
+            // argument.
+            //
+            arg = base;
+            continue;
+        }
+
+        // By default, we will *not* consider an argument
+        // suitable for specialization.
+        //
+        // TODO: There may be other cases that are worth
+        // handling here. The current code is based on
+        // observation of what simple shaders do in
+        // practice.
+        //
+        return false;
+    }
+}
+
 struct FunctionParameterSpecializationContext
 {
     // This type implements a pass to specialize functions
@@ -121,14 +176,15 @@ struct FunctionParameterSpecializationContext
         // two conditions we care about:
         //
         // 1. Should we specialize? This amounts to whether
-        // `func` has any parameters that need specialization.
-        // We will call those "specializable" parameters for
-        // lack of a better name.
+        // `func` has any parameters that "want" specialization,
+        // or wheter `call` has any arguments that "want" specialization.
+        // If either the parameter or argument at a given position
+        // want specialization, we will call the coresponding parameter
+        // a "specializable" parameter for lack of a better name.
         //
         // 2. Can we specialize? This amounts to whether the
-        // arguments in `call` that correspond to those
-        // specializable parameters are "suitable" for use
-        // in specialization.
+        // parameter of `func` and the corresponding argument to
+        // `call` are both "suitable" for specialization.
         //
         // We are going to answer both of these queries in
         // a single loop that walks over the parameters of
@@ -147,23 +203,23 @@ struct FunctionParameterSpecializationContext
             SLANG_ASSERT(argIndex < call->getArgCount());
             auto arg = call->getArg(argIndex);
 
-            // If the given parameter doesn't need specialization,
+            // If neither the parameter nor the argument wants specialization,
             // then we need to keep looking.
             //
-            if(!doesParamNeedSpecialization(param))
+            if(!doesParamWantSpecialization(param, arg))
                 continue;
 
-            // If we have run into a `param` that needs specialization,
+            // If we have run into a `param` or `arg` that wants specialization,
             // then our first condition is met.
             //
             anySpecializableParam = true;
 
-            // Now we need to check whether `arg` is actually suitable
+            // Now we need to check whether `param` and `arg` are actually suitable
             // for specialization (our second condition). If not, we
             // can bail out immediately because our second condition
             // cannot be met.
             //
-            if(!isArgSuitableForSpecialization(arg))
+            if(!isParamSuitableForSpecialization(param, arg))
                 return false;
         }
 
@@ -178,62 +234,14 @@ struct FunctionParameterSpecializationContext
     // Of course, now we need to back-fill the predicates that
     // the above function used to evaluate prameters and arguments.
 
-    bool doesParamNeedSpecialization(IRParam* param)
+    bool doesParamWantSpecialization(IRParam* param, IRInst* arg)
     {
-        return condition->doesParamNeedSpecialization(param);
+        return condition->doesParamWantSpecialization(param, arg);
     }
 
-    bool isArgSuitableForSpecialization(IRInst* inArg)
+    bool isParamSuitableForSpecialization(IRParam* param, IRInst* arg)
     {
-        // Determining if an argument is suitable for
-        // specializing a callee function requires
-        // looking at its (recurisve) structure.
-        //
-        // Rather than write a recursively procedure
-        // here, we will be tail-recursive by using
-        // a simple loop.
-        //
-        IRInst* arg = inArg;
-        for(;;)
-        {
-            // The leaf case we care about is when the
-            // argument at the call site is a global
-            // shader parameter, because then we can
-            // specialize a callee to refer to the same
-            // global parameter directly.
-            //
-            if(as<IRGlobalParam>(arg)) return true;
-
-            // As we will see later, we can also
-            // specialize a call when the argument
-            // is the result of indexing into an
-            // array (`base[index]`) *if* the `base`
-            // of the indexing operation is also
-            // suitable for specialization.
-            //
-            if( arg->getOp() == kIROp_getElement || arg->getOp() == kIROp_Load )
-            {
-                auto base = arg->getOperand(0);
-
-                // We will "recurse" on the base of
-                // the indexing operation by continuing
-                // our loop with the `base` as our new
-                // argument.
-                //
-                arg = base;
-                continue;
-            }
-
-            // By default, we will *not* consider an argument
-            // suitable for specialization.
-            //
-            // TODO: There may be other cases that are worth
-            // handling here. The current code is based on
-            // observation of what simple shaders do in
-            // practice.
-            //
-            return false;
-        }
+        return condition->isParamSuitableForSpecialization(param, arg);
     }
 
     // Once we'e determined that a given call site can/should
@@ -451,10 +459,10 @@ struct FunctionParameterSpecializationContext
         IRParam*                oldParam,
         IRInst*                 oldArg)
     {
-        // We know that the case where a parameter
-        // doesn't need specialization is easy.
+        // We know that the case where the parameter
+        // and argument don't want specialization is easy.
         //
-        if( !doesParamNeedSpecialization(oldParam) )
+        if( !doesParamWantSpecialization(oldParam, oldArg) )
         {
             // The new call site will use the same argument
             // value as the old one, and we don't need
@@ -470,6 +478,12 @@ struct FunctionParameterSpecializationContext
             // is handled with a different function
             // because it needs to recurse in some cases.
             //
+            // We will add the parameter that we are specializing to
+            // the key for caching of specializations, because functions
+            // specialized at different parameter positions should not
+            // be shared.
+            //
+            ioInfo.key.vals.add(oldParam);
             getCallInfoForArg(ioInfo, oldArg);
         }
     }
@@ -572,7 +586,7 @@ struct FunctionParameterSpecializationContext
         // As always, the easy case is when the parameter of
         // the original function doesn't need specialization.
         //
-        if( !doesParamNeedSpecialization(oldParam) )
+        if( !doesParamWantSpecialization(oldParam, oldArg) )
         {
             // The specialized callee will need a new parameter
             // that fills the same role as the old one, so we
@@ -677,9 +691,19 @@ struct FunctionParameterSpecializationContext
 
             return newVal;
         }
-        else if (oldArg->getOp() == kIROp_Load)
+        else if (auto oldArgLoad = as<IRLoad>(oldArg))
         {
-            return getSpecializedValueForArg(ioInfo, oldArg->getOperand(0));
+            auto oldPtr = oldArgLoad->getPtr();
+            auto newPtr = getSpecializedValueForArg(ioInfo, oldPtr);
+
+            auto builder = getBuilder();
+            builder->setInsertInto(nullptr);
+            auto newVal = builder->emitLoad(
+                oldArg->getFullType(),
+                newPtr);
+            ioInfo.newBodyInsts.add(newVal);
+
+            return newVal;
         }
         else
         {
diff --git a/source/slang/slang-ir-specialize-function-call.h b/source/slang/slang-ir-specialize-function-call.h
index 90c463374..868f9def2 100644
--- a/source/slang/slang-ir-specialize-function-call.h
+++ b/source/slang/slang-ir-specialize-function-call.h
@@ -5,13 +5,16 @@ namespace Slang
 {
     class BackEndCompileRequest;
     class TargetRequest;
+    struct IRInst;
     struct IRModule;
     struct IRParam;
 
     class FunctionCallSpecializeCondition
     {
     public:
-        virtual bool doesParamNeedSpecialization(IRParam* param) = 0;
+        virtual bool doesParamWantSpecialization(IRParam* param, IRInst* arg) = 0;
+
+        virtual bool isParamSuitableForSpecialization(IRParam* param, IRInst* arg);
     };
 
 
diff --git a/source/slang/slang-ir-specialize-resources.cpp b/source/slang/slang-ir-specialize-resources.cpp
index c7398fe23..00357ca50 100644
--- a/source/slang/slang-ir-specialize-resources.cpp
+++ b/source/slang/slang-ir-specialize-resources.cpp
@@ -18,8 +18,10 @@ struct ResourceParameterSpecializationCondition : FunctionCallSpecializeConditio
 
     TargetRequest* targetRequest = nullptr;
 
-    bool doesParamNeedSpecialization(IRParam* param)
+    bool doesParamWantSpecialization(IRParam* param, IRInst* arg)
     {
+        SLANG_UNUSED(arg);
+
         // Whether or not a parameter needs specialization is really
         // a function of its type:
         //
diff --git a/source/slang/slang-lower-to-ir.cpp b/source/slang/slang-lower-to-ir.cpp
index d2d15735c..c7e32072e 100644
--- a/source/slang/slang-lower-to-ir.cpp
+++ b/source/slang/slang-lower-to-ir.cpp
@@ -2014,6 +2014,40 @@ LoweredValInfo createVar(
     return LoweredValInfo::ptr(irAlloc);
 }
 
+// When we try to turn a `LoweredValInfo` into an address of some temporary storage,
+// we can either do it "aggressively" or not (what we'll call the "default" behavior,
+// although it isn't strictly more common).
+//
+// The case that this is mostly there to address is when somebody writes an operation
+// like:
+//
+//      foo[a] = b;
+//
+// In that case, we might as well just use the `set` accessor if there is one, rather
+// than complicate things. However, in more complex cases like:
+//
+//      foo[a].x = b;
+//
+// there is no way to satisfy the semantics of the code the user wrote (in terms of
+// only writing one vector component, and not a full vector) by using the `set`
+// accessor, and we need to be "aggressive" in turning the lvalue `foo[a]` into
+// an address.
+//
+// TODO: realistically IR lowering is too early to be binding to this choice,
+// because different accessors might be supported on different targets.
+//
+enum class TryGetAddressMode
+{
+    Default,
+    Aggressive,
+};
+
+/// Try to coerce `inVal` into a `LoweredValInfo::ptr()` with a simple address.
+LoweredValInfo tryGetAddress(
+    IRGenContext* context,
+    LoweredValInfo const& inVal,
+    TryGetAddressMode       mode);
+
     /// Add a single `in` argument value to a list of arguments
 void addInArg(
     IRGenContext*   context,
@@ -2092,59 +2126,49 @@ void addArg(
             // According to our "calling convention" we need to
             // pass a pointer into the callee.
             //
-            // A naive approach would be to just take the address
-            // of `loweredArg` above and pass it in, but that
-            // has two issues:
-            //
-            // 1. The l-value might not be something that has a single
-            //    well-defined "address" (e.g., `foo.xzy`).
-            //
-            // 2. The l-value argument might actually alias some other
-            //    storage that the callee will access (e.g., we are
-            //    passing in a global variable, or two `out` parameters
-            //    are being passed the same location in an array).
-            //
-            // In each of these cases, the safe option is to create
-            // a temporary variable to use for argument-passing,
-            // and then do copy-in/copy-out around the call.
+            // Ideally we would like to just pass the address of
+            // `loweredArg`, and when that it possible we will do so.
+            // It may happen, though, that `loweredArg` is not an
+            // addressable l-value (e.g., it is `foo.xyz`, so that
+            // the bytes of the l-value are not contiguous).
             //
-            // TODO: We should consider ruling out case (2) as undefined
-            // behavior, and specify that whether `inout` and `out` are
-            // handled via copy-in-copy-out or by-reference parameter
-            // passing is an implementation detail. That would allow
-            // us to avoid introducing a copy except where it is required
-            // for the semantics of (1).
-            //
-            // TODO: We should confirm whether such a change will make
-            // it harder to create SSA values for variables that get
-            // used with `out` or `inout` parameters.
-
-            LoweredValInfo tempVar = createVar(context, paramType);
-
-            // If the parameter is `in out` or `inout`, then we need
-            // to ensure that we pass in the original value stored
-            // in the argument, which we accomplish by assigning
-            // from the l-value to our temp.
-            if(paramDirection == kParameterDirection_InOut)
+            LoweredValInfo argPtr = tryGetAddress(context, argVal, TryGetAddressMode::Default);
+            if(argPtr.flavor == LoweredValInfo::Flavor::Ptr)
             {
-                assign(context, tempVar, argVal);
+                addInArg(context, ioArgs, LoweredValInfo::simple(argPtr.val));
             }
+            else
+            {
+                // If the value is not one that could yield a simple l-value
+                // then we need to convert it into a temporary
+                //
+                LoweredValInfo tempVar = createVar(context, paramType);
 
-            // Now we can pass the address of the temporary variable
-            // to the callee as the actual argument for the `in out`
-            SLANG_ASSERT(tempVar.flavor == LoweredValInfo::Flavor::Ptr);
-            IRInst* tempPtr = getAddress(context, tempVar, loc);
-            addInArg(context, ioArgs, LoweredValInfo::simple(tempPtr));
+                // If the parameter is `in out` or `inout`, then we need
+                // to ensure that we pass in the original value stored
+                // in the argument, which we accomplish by assigning
+                // from the l-value to our temp.
+                //
+                if (paramDirection == kParameterDirection_InOut)
+                {
+                    assign(context, tempVar, argVal);
+                }
 
-            // Finally, after the call we will need
-            // to copy in the other direction: from our
-            // temp back to the original l-value.
-            OutArgumentFixup fixup;
-            fixup.src = tempVar;
-            fixup.dst = argVal;
+                // Now we can pass the address of the temporary variable
+                // to the callee as the actual argument for the `in out`
+                SLANG_ASSERT(tempVar.flavor == LoweredValInfo::Flavor::Ptr);
+                IRInst* tempPtr = getAddress(context, tempVar, loc);
+                addInArg(context, ioArgs, LoweredValInfo::simple(tempPtr));
 
-            (*ioFixups).add(fixup);
+                // Finally, after the call we will need
+                // to copy in the other direction: from our
+                // temp back to the original l-value.
+                OutArgumentFixup fixup;
+                fixup.src = tempVar;
+                fixup.dst = argVal;
 
+                (*ioFixups).add(fixup);
+            }
         }
         break;
 
@@ -2196,40 +2220,6 @@ void addCallArgsForParam(
 
 //
 
-// When we try to turn a `LoweredValInfo` into an address of some temporary storage,
-// we can either do it "aggressively" or not (what we'll call the "default" behavior,
-// although it isn't strictly more common).
-//
-// The case that this is mostly there to address is when somebody writes an operation
-// like:
-//
-//      foo[a] = b;
-//
-// In that case, we might as well just use the `set` accessor if there is one, rather
-// than complicate things. However, in more complex cases like:
-//
-//      foo[a].x = b;
-//
-// there is no way to satisfy the semantics of the code the user wrote (in terms of
-// only writing one vector component, and not a full vector) by using the `set`
-// accessor, and we need to be "aggressive" in turning the lvalue `foo[a]` into
-// an address.
-//
-// TODO: realistically IR lowering is too early to be binding to this choice,
-// because different accessors might be supported on different targets.
-//
-enum class TryGetAddressMode
-{
-    Default,
-    Aggressive,
-};
-
-/// Try to coerce `inVal` into a `LoweredValInfo::ptr()` with a simple address.
-LoweredValInfo tryGetAddress(
-    IRGenContext*           context,
-    LoweredValInfo const&   inVal,
-    TryGetAddressMode       mode);
-
     /// Compute the direction for a parameter based on its declaration
 ParameterDirection getParameterDirection(VarDeclBase* paramDecl)
 {
diff --git a/tests/bugs/gh-841.slang.glsl b/tests/bugs/gh-841.slang.glsl
index ab223724f..8dd37274a 100644
--- a/tests/bugs/gh-841.slang.glsl
+++ b/tests/bugs/gh-841.slang.glsl
@@ -24,8 +24,8 @@ void main()
 
     if(bool(_S4.u_0 & uint(1)))
     {
-        vec4 _S5 = result_1 + 1.0;
-        result_0 = _S5;
+        vec4 result_2 = result_1 + 1.0;
+        result_0 = result_2;
     }
     else
     {
diff --git a/tests/bugs/vk-image-atomics.slang.glsl b/tests/bugs/vk-image-atomics.slang.glsl
index 16dffd8dc..2ed19bffa 100644
--- a/tests/bugs/vk-image-atomics.slang.glsl
+++ b/tests/bugs/vk-image-atomics.slang.glsl
@@ -9,8 +9,8 @@ out vec4 _S1;
 
 void main()
 {
-    uint _S2;
-    _S2 = imageAtomicAdd(t_0, ivec2(uvec2(0)), 1);
-    _S1 = vec4(_S2);
+    uint u_0;
+    u_0 = imageAtomicAdd(t_0, ivec2(uvec2(0)), 1);
+    _S1 = vec4(u_0);
     return;
 }
diff --git a/tests/compute/unbounded-array-of-array-syntax.slang.glsl b/tests/compute/unbounded-array-of-array-syntax.slang.glsl
index d9d0f6262..a275b4599 100644
--- a/tests/compute/unbounded-array-of-array-syntax.slang.glsl
+++ b/tests/compute/unbounded-array-of-array-syntax.slang.glsl
@@ -21,21 +21,21 @@ void main()
 
     int innerIndex_1 = index_0 & 3;
 
-    uint _S3;
-    uint _S4;
-    (_S3) = (g_aoa_0[nonuniformEXT(index_0 >> 2)])._data.length(); (_S4) = 0;
-    uint bufferCount_0 = _S3;
+    uint bufferCount_0;
+    uint bufferStride_0;
+    (bufferCount_0) = (g_aoa_0[nonuniformEXT(index_0 >> 2)])._data.length();
+    (bufferStride_0) = 0;
 
     if(uint(innerIndex_1) >= bufferCount_0)
     {
-        int _S5 = int(bufferCount_0 - uint(1));
-        innerIndex_0 = _S5;
+        int _S3 = int(bufferCount_0 - uint(1));
+        innerIndex_0 = _S3;
     }
     else
     {
         innerIndex_0 = innerIndex_1;
     }
-    uint _S6 = uint(innerIndex_0);
-    ((outputBuffer_0)._data[(uint(index_0))]) = ((g_aoa_0[nonuniformEXT(index_0 >> 2)])._data[(_S6)]);
+    uint _S4 = uint(innerIndex_0);
+    ((outputBuffer_0)._data[(uint(index_0))]) = ((g_aoa_0[nonuniformEXT(index_0 >> 2)])._data[(_S4)]);
     return;
 }
diff --git a/tests/cross-compile/geometry-shader.slang.glsl b/tests/cross-compile/geometry-shader.slang.glsl
index feaf3e1f2..55e1691a9 100644
--- a/tests/cross-compile/geometry-shader.slang.glsl
+++ b/tests/cross-compile/geometry-shader.slang.glsl
@@ -91,8 +91,8 @@ void main()
 
         EmitVertex();
 
-        int _S9 = ii_0 + 1;
-        ii_0 = _S9;
+        int ii_1 = ii_0 + 1;
+        ii_0 = ii_1;
     }
 
     return;
diff --git a/tests/pipeline/ray-tracing/trace-ray-inline.slang.glsl b/tests/pipeline/ray-tracing/trace-ray-inline.slang.glsl
index fb94867a8..597236122 100644
--- a/tests/pipeline/ray-tracing/trace-ray-inline.slang.glsl
+++ b/tests/pipeline/ray-tracing/trace-ray-inline.slang.glsl
@@ -85,20 +85,23 @@ void myMiss_0(inout MyRayPayload_0 payload_4)
 layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
 void main()
 {
-    MyRayPayload_0 payload_5;
     MyProceduralHitAttrs_0 committedProceduralAttrs_0;
     MyProceduralHitAttrs_0 committedProceduralAttrs_1;
-    MyRayPayload_0 payload_6;
     MyProceduralHitAttrs_0 committedProceduralAttrs_2;
-    MyRayPayload_0 payload_7;
     MyProceduralHitAttrs_0 committedProceduralAttrs_3;
+
     rayQueryEXT query_0;
+
+    MyRayPayload_0 payload_5;
     MyRayPayload_0 _S2 = { -1 };
+    payload_5 = _S2;
+ 
     RayDesc_0 ray_1 = { C_0._data.origin_0, C_0._data.tMin_0, C_0._data.direction_0, C_0._data.tMax_0 };
     RayQuery_TraceRayInline_0(query_0, myAccelerationStructure_0, C_0._data.rayFlags_0, C_0._data.instanceMask_0, ray_1);
+
     MyProceduralHitAttrs_0 _S3;
-    payload_5 = _S2;
     committedProceduralAttrs_0 = _S3;
+
     for(;;)
     {
         bool _S4 = rayQueryProceedEXT(query_0);
@@ -111,23 +114,21 @@ void main()
         {
         case uint(1):
             {
-                MyProceduralHitAttrs_0 candidateProceduralAttrs_0 = { 0 };
-                float _S6;
-                _S6 = 0.00000000000000000000;
-                MyProceduralHitAttrs_0 _S7;
-                _S7 = candidateProceduralAttrs_0;
-                bool _S8 = myProceduralIntersection_0(_S6, _S7);
-                float tHit_1 = _S6;
-                MyProceduralHitAttrs_0 candidateProceduralAttrs_1 = _S7;
-                if(_S8)
+                MyProceduralHitAttrs_0 candidateProceduralAttrs_0;
+                MyProceduralHitAttrs_0 _S6 = { 0 };
+                candidateProceduralAttrs_0 = _S6;
+
+                float tHit_1;
+                tHit_1 = 0.00000000000000000000;
+
+                bool _S7 = myProceduralIntersection_0(tHit_1, candidateProceduralAttrs_0);
+                if(_S7)
                 {
-                    MyRayPayload_0 _S9;
-                    _S9 = payload_5;
-                    bool _S10 = myProceduralAnyHit_0(_S9);
-                    MyRayPayload_0 _S11 = _S9;
-                    if(_S10)
+                    bool _S8 = myProceduralAnyHit_0(payload_5);
+                    if(_S8)
                     {
                         rayQueryGenerateIntersectionEXT(query_0, tHit_1);
+                        MyProceduralHitAttrs_0 _S9 = candidateProceduralAttrs_0;
                         if(bool(C_0._data.shouldStopAtFirstHit_0))
                         {
                             rayQueryTerminateEXT(query_0);
@@ -135,31 +136,25 @@ void main()
                         else
                         {
                         }
-                        committedProceduralAttrs_1 = candidateProceduralAttrs_1;
+                        committedProceduralAttrs_1 = _S9;
                     }
                     else
                     {
                         committedProceduralAttrs_1 = committedProceduralAttrs_0;
                     }
-                    payload_6 = _S11;
                     committedProceduralAttrs_2 = committedProceduralAttrs_1;
                 }
                 else
                 {
-                    payload_6 = payload_5;
                     committedProceduralAttrs_2 = committedProceduralAttrs_0;
                 }
-                payload_7 = payload_6;
                 committedProceduralAttrs_3 = committedProceduralAttrs_2;
                 break;
             }
         case uint(0):
             {
-                MyRayPayload_0 _S12;
-                _S12 = payload_5;
-                bool _S13 = myTriangleAnyHit_0(_S12);
-                MyRayPayload_0 _S14 = _S12;
-                if(_S13)
+                bool _S10 = myTriangleAnyHit_0(payload_5);
+                if(_S10)
                 {
                     rayQueryConfirmIntersectionEXT(query_0);
                     if(bool(C_0._data.shouldStopAtFirstHit_0))
@@ -173,42 +168,33 @@ void main()
                 else
                 {
                 }
-                payload_7 = _S14;
                 committedProceduralAttrs_3 = committedProceduralAttrs_0;
                 break;
             }
         default:
             {
-                payload_7 = payload_5;
                 committedProceduralAttrs_3 = committedProceduralAttrs_0;
                 break;
             }
         }
-        payload_5 = payload_7;
         committedProceduralAttrs_0 = committedProceduralAttrs_3;
     }
-    uint _S15 = (rayQueryGetIntersectionTypeEXT((query_0), true));
-    switch(_S15)
+    uint _S11 = (rayQueryGetIntersectionTypeEXT((query_0), true));
+    switch(_S11)
     {
     case uint(1):
         {
-            MyRayPayload_0 _S16;
-            _S16 = payload_5;
-            myTriangleClosestHit_0(_S16);
+            myTriangleClosestHit_0(payload_5);
             break;
         }
     case uint(2):
         {
-            MyRayPayload_0 _S17;
-            _S17 = payload_5;
-            myProceduralClosestHit_0(_S17, committedProceduralAttrs_0);
+            myProceduralClosestHit_0(payload_5, committedProceduralAttrs_0);
             break;
         }
     case uint(0):
         {
-            MyRayPayload_0 _S18;
-            _S18 = payload_5;
-            myMiss_0(_S18);
+            myMiss_0(payload_5);
             break;
         }
     default:
diff --git a/tests/slang-extension/atomic-float-byte-address-buffer-cross.slang.glsl b/tests/slang-extension/atomic-float-byte-address-buffer-cross.slang.glsl
index f5f575287..139d55518 100644
--- a/tests/slang-extension/atomic-float-byte-address-buffer-cross.slang.glsl
+++ b/tests/slang-extension/atomic-float-byte-address-buffer-cross.slang.glsl
@@ -3,17 +3,12 @@
 layout(row_major) uniform;
 layout(row_major) buffer;
 
-#line 10 "tests/slang-extension/atomic-float-byte-address-buffer-cross.slang"
 layout(std430, binding = 1) buffer _S1 {
     float _data[];
 } anotherBuffer_0;
-
-#line 60 "hlsl"
 layout(std430, binding = 0) buffer _S2 {
     float _data[];
 } _S3;
-
-#line 18 "tests/slang-extension/atomic-float-byte-address-buffer-cross.slang"
 void RWByteAddressBuffer_InterlockedAddF32_0(uint _S4, float _S5, out float _S6)
 {
     uint _S7 = _S4 / uint(4);
@@ -29,20 +24,15 @@ void RWByteAddressBuffer_InterlockedAddF32_1(uint _S9, float _S10)
     return;
 }
 
-
-#line 13
-layout(local_size_x = 16, local_size_y = 1, local_size_z = 1) in;void main()
+layout(local_size_x = 16, local_size_y = 1, local_size_z = 1) in;
+void main()
 {
     uint tid_0 = gl_GlobalInvocationID.x;
     int idx_0 = int(tid_0 & uint(3) ^ tid_0 >> 2);
-
     float delta_0 = ((anotherBuffer_0)._data[(uint(idx_0 & 3))]);
-
-#line 21
-    float _S13;
-    RWByteAddressBuffer_InterlockedAddF32_0(uint(idx_0 << 2), 1.00000000000000000000, _S13);
+    float previousValue_0;
+    previousValue_0 = float(0);
+    RWByteAddressBuffer_InterlockedAddF32_0(uint(idx_0 << 2), 1.00000000000000000000, previousValue_0);
     RWByteAddressBuffer_InterlockedAddF32_1(uint(int(tid_0 >> 2) << 2), delta_0);
-
-#line 13
     return;
-}
-\ No newline at end of file
+}
diff --git a/tests/vkray/callable-caller.slang.glsl b/tests/vkray/callable-caller.slang.glsl
index b0d174381..91b788655 100644
--- a/tests/vkray/callable-caller.slang.glsl
+++ b/tests/vkray/callable-caller.slang.glsl
@@ -1,30 +1,18 @@
+//TEST_IGNORE_FILE:
 #version 460
-
+#extension GL_NV_ray_tracing : require
 layout(row_major) uniform;
 layout(row_major) buffer;
-#extension GL_NV_ray_tracing : require
-
-#define tmp_ubo         _S1
-#define tmp_launchid    _S2
-#define tmp_luanchidf   _S3
-#define tmp_launchsize  _S4
-#define tmp_launchpos   _S5
-#define tmp_shaderidx   _S6
-#define tmp_payload     _S7
-#define tmp_launchid2   _S8
-
 struct SLANG_ParameterGroup_C_0
 {
     uint shaderIndex_0;
 };
 
 layout(binding = 0)
-layout(std140)
-uniform tmp_ubo
+layout(std140) uniform _S1
 {
     SLANG_ParameterGroup_C_0 _data;
 } C_0;
-
 struct MaterialPayload_0
 {
     vec4 albedo_0;
@@ -32,15 +20,10 @@ struct MaterialPayload_0
 };
 
 layout(location = 0)
-rayPayloadNV MaterialPayload_0 p_0;
-
-layout(rgba32f)
-layout(binding = 1)
-uniform image2D gImage_0;
+rayPayloadNV
+MaterialPayload_0 p_0;
 
-void CallShader_0(
-    uint shaderIndex_1,
-    inout MaterialPayload_0 payload_0)
+void CallShader_0(uint shaderIndex_1, inout MaterialPayload_0 payload_0)
 {
     p_0 = payload_0;
     executeCallableNV(shaderIndex_1, (0));
@@ -48,30 +31,21 @@ void CallShader_0(
     return;
 }
 
+layout(rgba32f)
+layout(binding = 1)
+uniform image2D gImage_0;
+
 void main()
 {
     MaterialPayload_0 payload_1;
     payload_1.albedo_0 = vec4(0);
-
-    uvec3 tmp_launchid = gl_LaunchIDNV;
-    vec2 tmp_luanchidf = vec2(tmp_launchid.xy);
-
-    uvec3 tmp_launchsize = gl_LaunchSizeNV;
-    vec2 tmp_launchpos = tmp_luanchidf / vec2(tmp_launchsize.xy);
-
-    payload_1.uv_0 = tmp_launchpos;
-
-    uint tmp_shaderidx = C_0._data.shaderIndex_0;
-
-    MaterialPayload_0 tmp_payload;
-    tmp_payload = payload_1;
-    CallShader_0(tmp_shaderidx, tmp_payload);
-    payload_1 = tmp_payload;
-
-    uvec3 tmp_launchid2 = gl_LaunchIDNV;
-    imageStore(
-        gImage_0,
-        ivec2(tmp_launchid2.xy),
-        payload_1.albedo_0);
+    uvec3 _S2 = ((gl_LaunchIDNV));
+    vec2 _S3 = vec2(_S2.xy);
+    uvec3 _S4 = ((gl_LaunchSizeNV));
+    vec2 _S5 = _S3 / vec2(_S4.xy);
+    payload_1.uv_0 = _S5;
+    CallShader_0(C_0._data.shaderIndex_0, payload_1);
+    uvec3 _S6 = ((gl_LaunchIDNV));
+    imageStore((gImage_0), ivec2((_S6.xy)), payload_1.albedo_0);
     return;
 }
diff --git a/tests/vkray/intersection.slang.glsl b/tests/vkray/intersection.slang.glsl
index 66846d993..ac95432dd 100644
--- a/tests/vkray/intersection.slang.glsl
+++ b/tests/vkray/intersection.slang.glsl
@@ -1,19 +1,8 @@
 //TEST_IGNORE_FILE:
 #version 460
-
 #extension GL_NV_ray_tracing : require
-
-#define tmp_ubo _S1
-#define tmp_reportHit _S2
-#define tmp_origin _S3
-#define tmp_direction _S4
-#define tmp_tmin _S5
-#define tmp_tmax _S6
-#define tmp_thit _S7
-#define tmp_hitattrs _S8
-#define tmp_dithit _S9
-#define tmp_reportresult _S10
-
+layout(row_major) uniform;
+layout(row_major) buffer;
 struct Sphere_0
 {
     vec3 position_0;
@@ -26,12 +15,10 @@ struct SLANG_ParameterGroup_U_0
 };
 
 layout(binding = 0)
-layout(std140)
-uniform tmp_ubo
+layout(std140) uniform _S1
 {
     SLANG_ParameterGroup_U_0 _data;
 } U_0;
-
 struct RayDesc_0
 {
     vec3 Origin_0;
@@ -45,54 +32,43 @@ struct SphereHitAttributes_0
     vec3 normal_0;
 };
 
-bool rayIntersectsSphere_0(
-    RayDesc_0 ray_0,
-    Sphere_0 sphere_0,
-    out float                   tHit_0,
-    out SphereHitAttributes_0   attrs_0)
+bool rayIntersectsSphere_0(RayDesc_0 ray_0, Sphere_0 sphere_0, out float tHit_0, out SphereHitAttributes_0 attrs_0)
 {
     tHit_0 = sphere_0.radius_0;
     attrs_0.normal_0 = sphere_0.position_0;
     return tHit_0 >= ray_0.TMin_0;
 }
 
-hitAttributeNV SphereHitAttributes_0 a_0;
+hitAttributeNV
+SphereHitAttributes_0 a_0;
 
 bool ReportHit_0(float tHit_1, uint hitKind_0, SphereHitAttributes_0 attributes_0)
 {
     a_0 = attributes_0;
-    bool tmp_reportHit = reportIntersectionNV(tHit_1, hitKind_0);
-    return tmp_reportHit;
+    bool _S2 = reportIntersectionNV(tHit_1, hitKind_0);
+    return _S2;
 }
 
 void main()
 {
     RayDesc_0 ray_1;
-
-    vec3 tmp_origin = gl_ObjectRayOriginNV;
-    ray_1.Origin_0 = tmp_origin;
-
-    vec3 tmp_direction = gl_ObjectRayDirectionNV;
-    ray_1.Direction_0 = tmp_direction;
-
-    float tmp_tmin = gl_RayTminNV;
-    ray_1.TMin_0 = tmp_tmin;
-
-    float tmp_tmax = gl_RayTmaxNV;
-    ray_1.TMax_0 = tmp_tmax;
-
-    float tmp_thit;
-    SphereHitAttributes_0 tmp_hitattrs;
-    bool tmp_dithit = rayIntersectsSphere_0(ray_1, U_0._data.gSphere_0, tmp_thit, tmp_hitattrs);
-
-    float tHit_2 = tmp_thit;
-    SphereHitAttributes_0 attrs_1 = tmp_hitattrs;
-
-    if(tmp_dithit)
+    vec3 _S3 = ((gl_ObjectRayOriginNV));
+    ray_1.Origin_0 = _S3;
+    vec3 _S4 = ((gl_ObjectRayDirectionNV));
+    ray_1.Direction_0 = _S4;
+    float _S5 = ((gl_RayTminNV));
+    ray_1.TMin_0 = _S5;
+    float _S6 = ((gl_RayTmaxNV));
+    ray_1.TMax_0 = _S6;
+    float tHit_2;
+    SphereHitAttributes_0 attrs_1;
+    bool _S7 = rayIntersectsSphere_0(ray_1, U_0._data.gSphere_0, tHit_2, attrs_1);
+    if(_S7)
+    {
+        bool _S8 = ReportHit_0(tHit_2, uint(0), attrs_1);
+    }
+    else
     {
-        bool tmp_reportresult = ReportHit_0(tHit_2, (uint((0))), attrs_1);
     }
-
     return;
 }
-
diff --git a/tests/vkray/raygen.slang.glsl b/tests/vkray/raygen.slang.glsl
index 79162be9f..f8b97973b 100644
--- a/tests/vkray/raygen.slang.glsl
+++ b/tests/vkray/raygen.slang.glsl
@@ -1,60 +1,16 @@
 //TEST_IGNORE_FILE:
 #version 460
-
-layout(row_major) uniform;
-
-#if USE_NV_RT
-#extension GL_NV_ray_tracing : require
-#define accelerationStructureEXT accelerationStructureNV
-#define callableDataInEXT callableDataInNV
-#define gl_LaunchIDEXT gl_LaunchIDNV
-#define hitAttributeEXT hitAttributeNV
-#define ignoreIntersectionEXT ignoreIntersectionNV
-#define rayPayloadInEXT rayPayloadInNV
-#define terminateRayEXT terminateRayNV
-#define traceRayEXT traceNV
-#else
 #extension GL_EXT_ray_tracing : require
-#endif
-
-#define TRACING_EPSILON 1e-6
-
-#define tmp_ubo             _S1
-#define tmp_saturate        _S2
-#define tmp_launchID_x      _S3
-#define tmp_add_x           _S4
-#define tmp_launchSize_x    _S5
-#define tmp_div_x           _S6
-#define tmp_launchID_y      _S7
-#define tmp_add_y           _S8
-#define tmp_launchSize_y    _S9
-#define tmp_div_y           _S10
-#define tmp_tex_pos         _S11
-#define tmp_tex_nrm         _S12
-#define tmp_light_invDist   _S13
-#define tmp_trace_A         _S14
-#define tmp_trace_B         _S15
-#define tmp_trace_C         _S16
-#define tmp_trace_D         _S17
-#define tmp_trace_E         _S18
-#define tmp_trace_ray       _S19
-#define tmp_trace_payload   _S20
-#define tmp_color           _S21
-#define tmp_dot             _S22
-#define tmp_sat             _S23
-#define tmp_trace2_A        _S24
-#define tmp_trace2_B        _S25
-#define tmp_trace2_C        _S26
-#define tmp_trace2_D        _S27
-#define tmp_trace2_E        _S28
-#define tmp_trace2_ray      _S39
-#define tmp_trace2_payload  _S30
-#define tmp_storeIdx        _S31
+layout(row_major) uniform;
+layout(row_major) buffer;
+layout(binding = 0)
+uniform texture2D samplerPosition_0;
 
+layout(binding = 2)
+uniform sampler sampler_0;
 
-layout(binding = 0) uniform texture2D samplerPosition_0;
-layout(binding = 2) uniform sampler sampler_0;
-layout(binding = 1) uniform texture2D samplerNormal_0;
+layout(binding = 1)
+uniform texture2D samplerNormal_0;
 
 struct Light_0
 {
@@ -62,8 +18,6 @@ struct Light_0
     vec4 color_0;
 };
 
-#define NUM_LIGHTS 17
-
 struct Uniforms_0
 {
     Light_0 light_0;
@@ -73,26 +27,18 @@ struct Uniforms_0
 };
 
 layout(binding = 3)
-layout(std140) uniform tmp_ubo
+layout(std140) uniform _S1
 {
     Uniforms_0 _data;
 } ubo_0;
-
-layout(binding = 5) uniform accelerationStructureEXT as_0;
-
 struct ShadowRay_0
 {
     float hitDistance_0;
 };
-layout(location = 0) rayPayloadEXT ShadowRay_0 p_0;
 
-struct ReflectionRay_0
-{
-    float color_1;
-};
-layout(location = 1) rayPayloadEXT ReflectionRay_0 p_1;
-
-layout(rgba32f) layout(binding = 4) uniform image2D outputImage_0;
+layout(location = 0)
+rayPayloadEXT
+ShadowRay_0 p_0;
 
 struct RayDesc_0
 {
@@ -102,141 +48,88 @@ struct RayDesc_0
     float TMax_0;
 };
 
-void TraceRay_0(
-    accelerationStructureEXT AccelerationStructure_0,
-    uint RayFlags_0,
-    uint InstanceInclusionMask_0,
-    uint RayContributionToHitGroupIndex_0,
-    uint MultiplierForGeometryContributionToHitGroupIndex_0,
-    uint MissShaderIndex_0,
-    RayDesc_0 Ray_0,
-    inout ShadowRay_0 Payload_0)
+void TraceRay_0(accelerationStructureEXT AccelerationStructure_0, uint RayFlags_0, uint InstanceInclusionMask_0, uint RayContributionToHitGroupIndex_0, uint MultiplierForGeometryContributionToHitGroupIndex_0, uint MissShaderIndex_0, RayDesc_0 Ray_0, inout ShadowRay_0 Payload_0)
 {
     p_0 = Payload_0;
-    traceRayEXT(
-        AccelerationStructure_0,
-        RayFlags_0,
-        InstanceInclusionMask_0,
-        RayContributionToHitGroupIndex_0,
-        MultiplierForGeometryContributionToHitGroupIndex_0,
-        MissShaderIndex_0,
-        Ray_0.Origin_0,
-        Ray_0.TMin_0,
-        Ray_0.Direction_0,
-        Ray_0.TMax_0,
-        0);
+    traceRayEXT(AccelerationStructure_0, RayFlags_0, InstanceInclusionMask_0, RayContributionToHitGroupIndex_0, MultiplierForGeometryContributionToHitGroupIndex_0, MissShaderIndex_0, Ray_0.Origin_0, Ray_0.TMin_0, Ray_0.Direction_0, Ray_0.TMax_0, (0));
     Payload_0 = p_0;
     return;
 }
 
-void TraceRay_1(
-    accelerationStructureEXT AccelerationStructure_1,
-    uint RayFlags_1,
-    uint InstanceInclusionMask_1,
-    uint RayContributionToHitGroupIndex_1,
-    uint MultiplierForGeometryContributionToHitGroupIndex_1,
-    uint MissShaderIndex_1,
-    RayDesc_0 Ray_1,
-    inout ReflectionRay_0 Payload_1)
+struct ReflectionRay_0
+{
+    float color_1;
+};
+
+layout(location = 1)
+rayPayloadEXT
+ReflectionRay_0 p_1;
+
+void TraceRay_1(accelerationStructureEXT AccelerationStructure_1, uint RayFlags_1, uint InstanceInclusionMask_1, uint RayContributionToHitGroupIndex_1, uint MultiplierForGeometryContributionToHitGroupIndex_1, uint MissShaderIndex_1, RayDesc_0 Ray_1, inout ReflectionRay_0 Payload_1)
 {
     p_1 = Payload_1;
-    traceRayEXT(
-        AccelerationStructure_1,
-        RayFlags_1,
-        InstanceInclusionMask_1,
-        RayContributionToHitGroupIndex_1,
-        MultiplierForGeometryContributionToHitGroupIndex_1,
-        MissShaderIndex_1,
-        Ray_1.Origin_0,
-        Ray_1.TMin_0,
-        Ray_1.Direction_0,
-        Ray_1.TMax_0,
-        1);
+    traceRayEXT(AccelerationStructure_1, RayFlags_1, InstanceInclusionMask_1, RayContributionToHitGroupIndex_1, MultiplierForGeometryContributionToHitGroupIndex_1, MissShaderIndex_1, Ray_1.Origin_0, Ray_1.TMin_0, Ray_1.Direction_0, Ray_1.TMax_0, (1));
     Payload_1 = p_1;
     return;
 }
 
+layout(binding = 5)
+uniform accelerationStructureEXT as_0;
+
 float saturate_0(float x_0)
 {
-    float tmp_saturate = clamp(x_0, float(0), float(1));
-    return tmp_saturate;
+    float _S2 = clamp(x_0, float(0), float(1));
+    return _S2;
 }
 
-void main() 
+layout(rgba32f)
+layout(binding = 4)
+uniform image2D outputImage_0;
+
+void main()
 {
     float atten_0;
-
-    uvec3 tmp_launchID_x = gl_LaunchIDEXT;
-    float tmp_add_x = float(tmp_launchID_x.x) + 0.5;
-    uvec3 tmp_launchSize_x = gl_LaunchSizeEXT;
-    float tmp_div_x = tmp_add_x / float(tmp_launchSize_x.x);
-
-    uvec3 tmp_launchID_y = gl_LaunchIDEXT;
-    float tmp_add_y = float(tmp_launchID_y.y) + 0.5;
-    uvec3 tmp_launchSize_y = gl_LaunchSizeEXT;
-    float tmp_div_y = tmp_add_y / float(tmp_launchSize_y.y);
-    vec2 inUV_0 = vec2(tmp_div_x, tmp_div_y);
-    
-    vec4 tmp_tex_pos = texture(sampler2D(samplerPosition_0, sampler_0), inUV_0);
-    vec3 P_0 = tmp_tex_pos.xyz;
-
-    vec4 tmp_tex_nrm = texture(sampler2D(samplerNormal_0, sampler_0), inUV_0);
-    vec3 N_0 = tmp_tex_nrm.xyz * 2.0 - 1.0;
-
+    uvec3 _S3 = ((gl_LaunchIDEXT));
+    float _S4 = float(_S3.x) + 0.50000000000000000000;
+    uvec3 _S5 = ((gl_LaunchSizeEXT));
+    float _S6 = _S4 / float(_S5.x);
+    uvec3 _S7 = ((gl_LaunchIDEXT));
+    float _S8 = float(_S7.y) + 0.50000000000000000000;
+    uvec3 _S9 = ((gl_LaunchSizeEXT));
+    float _S10 = _S8 / float(_S9.y);
+    vec2 inUV_0 = vec2(_S6, _S10);
+    vec4 _S11 = (texture(sampler2D(samplerPosition_0,sampler_0), (inUV_0)));
+    vec3 P_0 = _S11.xyz;
+    vec4 _S12 = (texture(sampler2D(samplerNormal_0,sampler_0), (inUV_0)));
+    vec3 N_0 = _S12.xyz * 2.00000000000000000000 - 1.00000000000000000000;
     vec3 lightDelta_0 = ubo_0._data.light_0.position_0.xyz - P_0;
     float lightDist_0 = length(lightDelta_0);
     vec3 L_0 = normalize(lightDelta_0);
-
-    float tmp_light_invDist = 1.0 / (lightDist_0 * lightDist_0);
-
+    float _S13 = 1.00000000000000000000 / (lightDist_0 * lightDist_0);
     RayDesc_0 ray_0;
     ray_0.Origin_0 = P_0;
-    ray_0.TMin_0 = TRACING_EPSILON;
+    ray_0.TMin_0 = 0.00000100000000000000;
     ray_0.Direction_0 = lightDelta_0;
     ray_0.TMax_0 = lightDist_0;
-
     ShadowRay_0 shadowRay_0;
     shadowRay_0.hitDistance_0 = float(0);
-    const uint tmp_trace_A = uint(1);
-    const uint tmp_trace_B = uint(0xFF);
-    const uint tmp_trace_C = uint(0);
-    const uint tmp_trace_D = uint(0);
-    const uint tmp_trace_E = uint(2);
-
-    RayDesc_0 tmp_trace_ray = ray_0;
-    ShadowRay_0 tmp_trace_payload;
-    tmp_trace_payload = shadowRay_0;
-    TraceRay_0(as_0, tmp_trace_A, tmp_trace_B, tmp_trace_C, tmp_trace_D, tmp_trace_E, tmp_trace_ray, tmp_trace_payload);
-    shadowRay_0 = tmp_trace_payload;
-
-    ReflectionRay_0 reflectionRay_0;
+    TraceRay_0(as_0, uint(1), uint(255), uint(0), uint(0), uint(2), ray_0, shadowRay_0);
     if(shadowRay_0.hitDistance_0 < lightDist_0)
     {
-        atten_0 = (0.00000000000000000000);
+        atten_0 = 0.00000000000000000000;
     }
     else
     {
-        atten_0 = tmp_light_invDist;
+        atten_0 = _S13;
     }
-
-    vec3 tmp_color = ubo_0._data.light_0.color_0.xyz;
-    float tmp_dot = dot(N_0, L_0);
-    float tmp_sat = saturate_0(tmp_dot);
-    vec3 color_2 = (tmp_color * tmp_sat) * atten_0;
-
-    const uint tmp_trace2_A = uint(1);
-    const uint tmp_trace2_B = uint(255);
-    const uint tmp_trace2_C = uint(0);
-    const uint tmp_trace2_D = uint(0);
-    const uint tmp_trace2_E = uint(2);
-    RayDesc_0 tmp_trace2_ray = ray_0;
-    ReflectionRay_0 tmp_trace2_payload;
-    tmp_trace2_payload = reflectionRay_0;
-    TraceRay_1(as_0, tmp_trace2_A, tmp_trace2_B, tmp_trace2_C, tmp_trace2_D, tmp_trace2_E, tmp_trace2_ray, tmp_trace2_payload);
-
-    vec3 color_3 = color_2 + tmp_trace2_payload.color_1;
-
-    uvec3 tmp_storeIdx = gl_LaunchIDEXT;
-    imageStore(outputImage_0, ivec2(uvec2(ivec2(tmp_storeIdx.xy))), vec4(color_3, 1.0));
+    vec3 _S14 = ubo_0._data.light_0.color_0.xyz;
+    float _S15 = dot(N_0, L_0);
+    float _S16 = saturate_0(_S15);
+    vec3 color_2 = _S14 * _S16 * atten_0;
+    ReflectionRay_0 reflectionRay_0;
+    TraceRay_1(as_0, uint(1), uint(255), uint(0), uint(0), uint(2), ray_0, reflectionRay_0);
+    vec3 color_3 = color_2 + reflectionRay_0.color_1;
+    uvec3 _S17 = ((gl_LaunchIDEXT));
+    imageStore((outputImage_0), ivec2((uvec2(ivec2(_S17.xy)))), vec4(color_3, 1.00000000000000000000));
     return;
 }