From 19c7c371aaef9dc537f6a6ed8cbfd77355f219ff Mon Sep 17 00:00:00 2001
From: Tim Foley <tfoley@nvidia.com>
Date: Mon, 6 Nov 2017 14:05:32 -0800
Subject: Remove `__intrinsic_op` from many decls

This attribute used to be how we marked ops for special handling in emission, but now it is being used to mark ops that map to single instructions. Either way, we have a bunch of intrinsic functions that need to get lowered in a more traditional fashion for HLSL, and the intrinsics are getting in the way.

Subsequent changes will fix up issues created by this removal.

A few cases were left unchanged, either because the ops really do map to single instructions, or because there is some special-case support attached to those operations that would be tricky to replace right now.
---
 source/slang/core.meta.slang   |  27 +-
 source/slang/core.meta.slang.h |  27 +-
 source/slang/hlsl.meta.slang   | 760 ++++++++++++++++++++---------------------
 source/slang/hlsl.meta.slang.h | 760 ++++++++++++++++++++---------------------
 4 files changed, 750 insertions(+), 824 deletions(-)

(limited to 'source')

diff --git a/source/slang/core.meta.slang b/source/slang/core.meta.slang
index 8accc83a4..f36b53227 100644
--- a/source/slang/core.meta.slang
+++ b/source/slang/core.meta.slang
@@ -444,13 +444,6 @@ for (int tt = 0; tt < kBaseTextureTypeCount; ++tt)
 
 
                     sb << ")\")\n";
-
-
-					// TIM: Making `GetDimensions` *not* be marked as
-					// an intrinsic, just so we can see how defining
-					// things as `extern` functions would work.
-//                    sb << "__intrinsic_op\n";
-
                 }
 
                 char const* t = isFloat ? "out float " : "out uint ";
@@ -524,7 +517,6 @@ for (int tt = 0; tt < kBaseTextureTypeCount; ++tt)
                 {
                     sb << "__target_intrinsic(glsl, \"texelFetch($$P, ($0)." << kGLSLLoadCoordsSwizzle[loadCoordCount] << ", ($0)." << kGLSLLoadLODSwizzle[loadCoordCount] << ")\")\n";
                 }
-                sb << "__intrinsic_op\n";
                 sb << "T Load(";
                 sb << "int" << loadCoordCount << " location";
                 if(isMultisample)
@@ -541,7 +533,6 @@ for (int tt = 0; tt < kBaseTextureTypeCount; ++tt)
                 {
                     sb << "__target_intrinsic(glsl, \"texelFetch($$P, ($0)." << kGLSLLoadCoordsSwizzle[loadCoordCount] << ", ($0)." << kGLSLLoadLODSwizzle[loadCoordCount] << ", $1)\")\n";
                 }
-                sb << "__intrinsic_op\n";
                 sb << "T Load(";
                 sb << "int" << loadCoordCount << " location";
                 if(isMultisample)
@@ -569,7 +560,7 @@ for (int tt = 0; tt < kBaseTextureTypeCount; ++tt)
 				// this should have both `get` and `set` accessors.
 
                 // subscript operator
-                sb << "__intrinsic_op __subscript(uint";
+                sb << "__subscript(uint";
 				if(kBaseTextureTypes[tt].coordCount + isArray > 1)
 				{
 					sb << kBaseTextureTypes[tt].coordCount + isArray;
@@ -584,16 +575,14 @@ for (int tt = 0; tt < kBaseTextureTypeCount; ++tt)
                 sb << "__target_intrinsic(glsl, \"texture($$p, $1)\")\n";
 
                 // TODO: only enable if IR is being used?
-                sb << "__intrinsic_op(sample)\n";
+//                sb << "__intrinsic_op(sample)\n";
 
-                sb << "__intrinsic_op\n";
                 sb << "T Sample(SamplerState s, ";
                 sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location);\n";
 
                 if( baseShape != TextureType::ShapeCube )
                 {
                     sb << "__target_intrinsic(glsl, \"textureOffset($$p, $1, $2)\")\n";
-                    sb << "__intrinsic_op\n";
                     sb << "T Sample(SamplerState s, ";
                     sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, ";
                     sb << "int" << kBaseTextureTypes[tt].coordCount << " offset);\n";
@@ -618,14 +607,12 @@ for (int tt = 0; tt < kBaseTextureTypeCount; ++tt)
 
                 // `SampleBias()`
                 sb << "__target_intrinsic(glsl, \"texture($$p, $1, $2)\")\n";
-                sb << "__intrinsic_op\n";
                 sb << "T SampleBias(SamplerState s, ";
                 sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, float bias);\n";
 
                 if( baseShape != TextureType::ShapeCube )
                 {
                     sb << "__target_intrinsic(glsl, \"textureOffset($$p, $1, $2, $3)\")\n";
-                    sb << "__intrinsic_op\n";
                     sb << "T SampleBias(SamplerState s, ";
                     sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, float bias, ";
                     sb << "int" << kBaseTextureTypes[tt].coordCount << " offset);\n";
@@ -678,7 +665,6 @@ for (int tt = 0; tt < kBaseTextureTypeCount; ++tt)
                     sb << ", vec" << baseCoordCount << "(0.0)";
                     sb << ")\")\n";
                 }
-                sb << "__intrinsic_op\n";
                 sb << "T SampleCmpLevelZero(SamplerComparisonState s, ";
                 sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, ";
                 sb << "float compareValue";
@@ -706,7 +692,7 @@ for (int tt = 0; tt < kBaseTextureTypeCount; ++tt)
 
 
                 sb << "__target_intrinsic(glsl, \"textureGrad($$p, $1, $2, $3)\")\n";
-                sb << "__intrinsic_op(sampleGrad)\n";
+//                sb << "__intrinsic_op(sampleGrad)\n";
                 sb << "T SampleGrad(SamplerState s, ";
                 sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, ";
                 sb << "float" << kBaseTextureTypes[tt].coordCount << " gradX, ";
@@ -716,7 +702,7 @@ for (int tt = 0; tt < kBaseTextureTypeCount; ++tt)
                 if( baseShape != TextureType::ShapeCube )
                 {
                     sb << "__target_intrinsic(glsl, \"textureGradOffset($$p, $1, $2, $3, $4)\")\n";
-                    sb << "__intrinsic_op(sampleGrad)\n";
+//                    sb << "__intrinsic_op(sampleGrad)\n";
                     sb << "T SampleGrad(SamplerState s, ";
                     sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, ";
                     sb << "float" << kBaseTextureTypes[tt].coordCount << " gradX, ";
@@ -727,7 +713,6 @@ for (int tt = 0; tt < kBaseTextureTypeCount; ++tt)
                 // `SampleLevel`
 
                 sb << "__target_intrinsic(glsl, \"textureLod($$p, $1, $2)\")\n";
-                sb << "__intrinsic_op\n";
                 sb << "T SampleLevel(SamplerState s, ";
                 sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, ";
                 sb << "float level);\n";
@@ -735,7 +720,6 @@ for (int tt = 0; tt < kBaseTextureTypeCount; ++tt)
                 if( baseShape != TextureType::ShapeCube )
                 {
                     sb << "__target_intrinsic(glsl, \"textureLodOffset($$p, $1, $2, $3)\")\n";
-                    sb << "__intrinsic_op\n";
                     sb << "T SampleLevel(SamplerState s, ";
                     sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, ";
                     sb << "float level, ";
@@ -803,13 +787,11 @@ for (int tt = 0; tt < kBaseTextureTypeCount; ++tt)
                     EMIT_LINE_DIRECTIVE();
                             
                     sb << "__target_intrinsic(glsl, \"textureGather($$p, $1, " << componentIndex << ")\")\n";
-                    sb << "__intrinsic_op\n";
                     sb << "vector<T, 4> Gather" << componentName << "(SamplerState s, ";
                     sb << "float" << kBaseTextureTypes[tt].coordCount << " location);\n";
 
                     EMIT_LINE_DIRECTIVE();
                     sb << "__target_intrinsic(glsl, \"textureGatherOffset($$p, $1, $2, " << componentIndex << ")\")\n";
-                    sb << "__intrinsic_op\n";
                     sb << "vector<T, 4> Gather" << componentName << "(SamplerState s, ";
                     sb << "float" << kBaseTextureTypes[tt].coordCount << " location, ";
                     sb << "int" << kBaseTextureTypes[tt].coordCount << " offset);\n";
@@ -822,7 +804,6 @@ for (int tt = 0; tt < kBaseTextureTypeCount; ++tt)
 
                     EMIT_LINE_DIRECTIVE();
                     sb << "__target_intrinsic(glsl, \"textureGatherOffsets($$p, $1, int" << kBaseTextureTypes[tt].coordCount << "[]($2, $3, $4, $5), " << componentIndex << ")\")\n";
-                    sb << "__intrinsic_op\n";
                     sb << "vector<T, 4> Gather" << componentName << "(SamplerState s, ";
                     sb << "float" << kBaseTextureTypes[tt].coordCount << " location, ";
                     sb << "int" << kBaseTextureTypes[tt].coordCount << " offset1, ";
diff --git a/source/slang/core.meta.slang.h b/source/slang/core.meta.slang.h
index 6b60d2896..1c0f28b26 100644
--- a/source/slang/core.meta.slang.h
+++ b/source/slang/core.meta.slang.h
@@ -447,13 +447,6 @@ for (int tt = 0; tt < kBaseTextureTypeCount; ++tt)
 
 
                     sb << ")\")\n";
-
-
-					// TIM: Making `GetDimensions` *not* be marked as
-					// an intrinsic, just so we can see how defining
-					// things as `extern` functions would work.
-//                    sb << "__intrinsic_op\n";
-
                 }
 
                 char const* t = isFloat ? "out float " : "out uint ";
@@ -527,7 +520,6 @@ for (int tt = 0; tt < kBaseTextureTypeCount; ++tt)
                 {
                     sb << "__target_intrinsic(glsl, \"texelFetch($P, ($0)." << kGLSLLoadCoordsSwizzle[loadCoordCount] << ", ($0)." << kGLSLLoadLODSwizzle[loadCoordCount] << ")\")\n";
                 }
-                sb << "__intrinsic_op\n";
                 sb << "T Load(";
                 sb << "int" << loadCoordCount << " location";
                 if(isMultisample)
@@ -544,7 +536,6 @@ for (int tt = 0; tt < kBaseTextureTypeCount; ++tt)
                 {
                     sb << "__target_intrinsic(glsl, \"texelFetch($P, ($0)." << kGLSLLoadCoordsSwizzle[loadCoordCount] << ", ($0)." << kGLSLLoadLODSwizzle[loadCoordCount] << ", $1)\")\n";
                 }
-                sb << "__intrinsic_op\n";
                 sb << "T Load(";
                 sb << "int" << loadCoordCount << " location";
                 if(isMultisample)
@@ -572,7 +563,7 @@ for (int tt = 0; tt < kBaseTextureTypeCount; ++tt)
 				// this should have both `get` and `set` accessors.
 
                 // subscript operator
-                sb << "__intrinsic_op __subscript(uint";
+                sb << "__subscript(uint";
 				if(kBaseTextureTypes[tt].coordCount + isArray > 1)
 				{
 					sb << kBaseTextureTypes[tt].coordCount + isArray;
@@ -587,16 +578,14 @@ for (int tt = 0; tt < kBaseTextureTypeCount; ++tt)
                 sb << "__target_intrinsic(glsl, \"texture($p, $1)\")\n";
 
                 // TODO: only enable if IR is being used?
-                sb << "__intrinsic_op(sample)\n";
+//                sb << "__intrinsic_op(sample)\n";
 
-                sb << "__intrinsic_op\n";
                 sb << "T Sample(SamplerState s, ";
                 sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location);\n";
 
                 if( baseShape != TextureType::ShapeCube )
                 {
                     sb << "__target_intrinsic(glsl, \"textureOffset($p, $1, $2)\")\n";
-                    sb << "__intrinsic_op\n";
                     sb << "T Sample(SamplerState s, ";
                     sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, ";
                     sb << "int" << kBaseTextureTypes[tt].coordCount << " offset);\n";
@@ -621,14 +610,12 @@ for (int tt = 0; tt < kBaseTextureTypeCount; ++tt)
 
                 // `SampleBias()`
                 sb << "__target_intrinsic(glsl, \"texture($p, $1, $2)\")\n";
-                sb << "__intrinsic_op\n";
                 sb << "T SampleBias(SamplerState s, ";
                 sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, float bias);\n";
 
                 if( baseShape != TextureType::ShapeCube )
                 {
                     sb << "__target_intrinsic(glsl, \"textureOffset($p, $1, $2, $3)\")\n";
-                    sb << "__intrinsic_op\n";
                     sb << "T SampleBias(SamplerState s, ";
                     sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, float bias, ";
                     sb << "int" << kBaseTextureTypes[tt].coordCount << " offset);\n";
@@ -681,7 +668,6 @@ for (int tt = 0; tt < kBaseTextureTypeCount; ++tt)
                     sb << ", vec" << baseCoordCount << "(0.0)";
                     sb << ")\")\n";
                 }
-                sb << "__intrinsic_op\n";
                 sb << "T SampleCmpLevelZero(SamplerComparisonState s, ";
                 sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, ";
                 sb << "float compareValue";
@@ -709,7 +695,7 @@ for (int tt = 0; tt < kBaseTextureTypeCount; ++tt)
 
 
                 sb << "__target_intrinsic(glsl, \"textureGrad($p, $1, $2, $3)\")\n";
-                sb << "__intrinsic_op(sampleGrad)\n";
+//                sb << "__intrinsic_op(sampleGrad)\n";
                 sb << "T SampleGrad(SamplerState s, ";
                 sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, ";
                 sb << "float" << kBaseTextureTypes[tt].coordCount << " gradX, ";
@@ -719,7 +705,7 @@ for (int tt = 0; tt < kBaseTextureTypeCount; ++tt)
                 if( baseShape != TextureType::ShapeCube )
                 {
                     sb << "__target_intrinsic(glsl, \"textureGradOffset($p, $1, $2, $3, $4)\")\n";
-                    sb << "__intrinsic_op(sampleGrad)\n";
+//                    sb << "__intrinsic_op(sampleGrad)\n";
                     sb << "T SampleGrad(SamplerState s, ";
                     sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, ";
                     sb << "float" << kBaseTextureTypes[tt].coordCount << " gradX, ";
@@ -730,7 +716,6 @@ for (int tt = 0; tt < kBaseTextureTypeCount; ++tt)
                 // `SampleLevel`
 
                 sb << "__target_intrinsic(glsl, \"textureLod($p, $1, $2)\")\n";
-                sb << "__intrinsic_op\n";
                 sb << "T SampleLevel(SamplerState s, ";
                 sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, ";
                 sb << "float level);\n";
@@ -738,7 +723,6 @@ for (int tt = 0; tt < kBaseTextureTypeCount; ++tt)
                 if( baseShape != TextureType::ShapeCube )
                 {
                     sb << "__target_intrinsic(glsl, \"textureLodOffset($p, $1, $2, $3)\")\n";
-                    sb << "__intrinsic_op\n";
                     sb << "T SampleLevel(SamplerState s, ";
                     sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, ";
                     sb << "float level, ";
@@ -806,13 +790,11 @@ for (int tt = 0; tt < kBaseTextureTypeCount; ++tt)
                     EMIT_LINE_DIRECTIVE();
                             
                     sb << "__target_intrinsic(glsl, \"textureGather($p, $1, " << componentIndex << ")\")\n";
-                    sb << "__intrinsic_op\n";
                     sb << "vector<T, 4> Gather" << componentName << "(SamplerState s, ";
                     sb << "float" << kBaseTextureTypes[tt].coordCount << " location);\n";
 
                     EMIT_LINE_DIRECTIVE();
                     sb << "__target_intrinsic(glsl, \"textureGatherOffset($p, $1, $2, " << componentIndex << ")\")\n";
-                    sb << "__intrinsic_op\n";
                     sb << "vector<T, 4> Gather" << componentName << "(SamplerState s, ";
                     sb << "float" << kBaseTextureTypes[tt].coordCount << " location, ";
                     sb << "int" << kBaseTextureTypes[tt].coordCount << " offset);\n";
@@ -825,7 +807,6 @@ for (int tt = 0; tt < kBaseTextureTypeCount; ++tt)
 
                     EMIT_LINE_DIRECTIVE();
                     sb << "__target_intrinsic(glsl, \"textureGatherOffsets($p, $1, int" << kBaseTextureTypes[tt].coordCount << "[]($2, $3, $4, $5), " << componentIndex << ")\")\n";
-                    sb << "__intrinsic_op\n";
                     sb << "vector<T, 4> Gather" << componentName << "(SamplerState s, ";
                     sb << "float" << kBaseTextureTypes[tt].coordCount << " location, ";
                     sb << "int" << kBaseTextureTypes[tt].coordCount << " offset1, ";
diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang
index dc1d4d8e8..cdf720006 100644
--- a/source/slang/hlsl.meta.slang
+++ b/source/slang/hlsl.meta.slang
@@ -4,29 +4,29 @@ typedef uint UINT;
 
 __generic<T> __magic_type(HLSLAppendStructuredBufferType) struct AppendStructuredBuffer
 {
-    __intrinsic_op void Append(T value);
+    void Append(T value);
 
-    __intrinsic_op void GetDimensions(
+    void GetDimensions(
         out uint numStructs,
         out uint stride);
 };
 
 __magic_type(HLSLByteAddressBufferType) struct ByteAddressBuffer
 {
-    __intrinsic_op void GetDimensions(
+    void GetDimensions(
         out uint dim);
 
-    __intrinsic_op uint Load(int location);
-    __intrinsic_op uint Load(int location, out uint status);
+    uint Load(int location);
+    uint Load(int location, out uint status);
 
-    __intrinsic_op uint2 Load2(int location);
-    __intrinsic_op uint2 Load2(int location, out uint status);
+    uint2 Load2(int location);
+    uint2 Load2(int location, out uint status);
 
-    __intrinsic_op uint3 Load3(int location);
-    __intrinsic_op uint3 Load3(int location, out uint status);
+    uint3 Load3(int location);
+    uint3 Load3(int location, out uint status);
 
-    __intrinsic_op uint4 Load4(int location);
-    __intrinsic_op uint4 Load4(int location, out uint status);
+    uint4 Load4(int location);
+    uint4 Load4(int location, out uint status);
 };
 
 __generic<T>
@@ -37,12 +37,12 @@ __intrinsic_type(${{
 }})
 struct StructuredBuffer
 {
-    __intrinsic_op void GetDimensions(
+    void GetDimensions(
         out uint numStructs,
         out uint stride);
 
-    __intrinsic_op T Load(int location);
-    __intrinsic_op T Load(int location, out uint status);
+    T Load(int location);
+    T Load(int location, out uint status);
 
     __intrinsic_op(bufferLoad)
     __subscript(uint index) -> T;
@@ -50,21 +50,21 @@ struct StructuredBuffer
 
 __generic<T> __magic_type(HLSLConsumeStructuredBufferType) struct ConsumeStructuredBuffer
 {
-    __intrinsic_op T Consume();
+    T Consume();
 
-    __intrinsic_op void GetDimensions(
+    void GetDimensions(
         out uint numStructs,
         out uint stride);
 };
 
 __generic<T, let N : int> __magic_type(HLSLInputPatchType) struct InputPatch
 {
-    __intrinsic_op __subscript(uint index) -> T;
+    __subscript(uint index) -> T;
 };
 
 __generic<T, let N : int> __magic_type(HLSLOutputPatchType) struct OutputPatch
 {
-    __intrinsic_op __subscript(uint index) -> T { set; }
+    __subscript(uint index) -> T { set; }
 };
 
 __magic_type(HLSLRWByteAddressBufferType) struct RWByteAddressBuffer
@@ -72,110 +72,110 @@ __magic_type(HLSLRWByteAddressBufferType) struct RWByteAddressBuffer
     // Note(tfoley): supports alll operations from `ByteAddressBuffer`
     // TODO(tfoley): can this be made a sub-type?
 
-    __intrinsic_op void GetDimensions(
+    void GetDimensions(
         out uint dim);
 
-    __intrinsic_op uint Load(int location);
-    __intrinsic_op uint Load(int location, out uint status);
+    uint Load(int location);
+    uint Load(int location, out uint status);
 
-    __intrinsic_op uint2 Load2(int location);
-    __intrinsic_op uint2 Load2(int location, out uint status);
+    uint2 Load2(int location);
+    uint2 Load2(int location, out uint status);
 
-    __intrinsic_op uint3 Load3(int location);
-    __intrinsic_op uint3 Load3(int location, out uint status);
+    uint3 Load3(int location);
+    uint3 Load3(int location, out uint status);
 
-    __intrinsic_op uint4 Load4(int location);
-    __intrinsic_op uint4 Load4(int location, out uint status);
+    uint4 Load4(int location);
+    uint4 Load4(int location, out uint status);
 
     // Added operations:
 
-    __intrinsic_op void InterlockedAdd(
+    void InterlockedAdd(
         UINT dest,
         UINT value,
         out UINT original_value);
-    __intrinsic_op void InterlockedAdd(
+    void InterlockedAdd(
         UINT dest,
         UINT value);
 
-    __intrinsic_op void InterlockedAnd(
+    void InterlockedAnd(
         UINT dest,
         UINT value,
         out UINT original_value);
-    __intrinsic_op void InterlockedAnd(
+    void InterlockedAnd(
         UINT dest,
         UINT value);
 
-    __intrinsic_op void InterlockedCompareExchange(
+    void InterlockedCompareExchange(
         UINT dest,
         UINT compare_value,
         UINT value,
         out UINT original_value);
-    __intrinsic_op void InterlockedCompareExchange(
+    void InterlockedCompareExchange(
         UINT dest,
         UINT compare_value,
         UINT value);
 
-    __intrinsic_op void InterlockedCompareStore(
+    void InterlockedCompareStore(
         UINT dest,
         UINT compare_value,
         UINT value);
-    __intrinsic_op void InterlockedCompareStore(
+    void InterlockedCompareStore(
         UINT dest,
         UINT compare_value);
 
-    __intrinsic_op void InterlockedExchange(
+    void InterlockedExchange(
         UINT dest,
         UINT value,
         out UINT original_value);
-    __intrinsic_op void InterlockedExchange(
+    void InterlockedExchange(
         UINT dest,
         UINT value);
 
-    __intrinsic_op void InterlockedMax(
+    void InterlockedMax(
         UINT dest,
         UINT value,
         out UINT original_value);
-    __intrinsic_op void InterlockedMax(
+    void InterlockedMax(
         UINT dest,
         UINT value);
 
-    __intrinsic_op void InterlockedMin(
+    void InterlockedMin(
         UINT dest,
         UINT value,
         out UINT original_value);
-    __intrinsic_op void InterlockedMin(
+    void InterlockedMin(
         UINT dest,
         UINT value);
 
-    __intrinsic_op void InterlockedOr(
+    void InterlockedOr(
         UINT dest,
         UINT value,
         out UINT original_value);
-    __intrinsic_op void InterlockedOr(
+    void InterlockedOr(
         UINT dest,
         UINT value);
 
-    __intrinsic_op void InterlockedXor(
+    void InterlockedXor(
         UINT dest,
         UINT value,
         out UINT original_value);
-    __intrinsic_op void InterlockedXor(
+    void InterlockedXor(
         UINT dest,
         UINT value);
 
-    __intrinsic_op void Store(
+    void Store(
         uint address,
         uint value);
 
-    __intrinsic_op void Store2(
+    void Store2(
         uint address,
         uint2 value);
 
-    __intrinsic_op void Store3(
+    void Store3(
         uint address,
         uint3 value);
 
-    __intrinsic_op void Store4(
+    void Store4(
         uint address,
         uint4 value);
 };
@@ -188,18 +188,17 @@ __intrinsic_type(${{
 }})
 struct RWStructuredBuffer
 {
-    __intrinsic_op uint DecrementCounter();
+    uint DecrementCounter();
 
-    __intrinsic_op void GetDimensions(
+    void GetDimensions(
         out uint numStructs,
         out uint stride);
 
-    __intrinsic_op uint IncrementCounter();
+    uint IncrementCounter();
 
-    __intrinsic_op T Load(int location);
-    __intrinsic_op T Load(int location, out uint status);
+    T Load(int location);
+    T Load(int location, out uint status);
 
-	__intrinsic_op
 	__subscript(uint index) -> T
 	{
 		__intrinsic_op(bufferLoad)
@@ -231,99 +230,96 @@ __generic<T> __magic_type(HLSLTriangleStreamType) struct TriangleStream
 // Note(tfoley): Trying to systematically add all the HLSL builtins
 
 // Try to terminate the current draw or dispatch call (HLSL SM 4.0)
-__intrinsic_op void abort();
+void abort();
 
 // Absolute value (HLSL SM 1.0)
-__generic<T : __BuiltinSignedArithmeticType> __intrinsic_op T abs(T x);
-__generic<T : __BuiltinSignedArithmeticType, let N : int> __intrinsic_op vector<T,N> abs(vector<T,N> x);
-__generic<T : __BuiltinSignedArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> abs(matrix<T,N,M> x);
+__generic<T : __BuiltinSignedArithmeticType> T abs(T x);
+__generic<T : __BuiltinSignedArithmeticType, let N : int> vector<T,N> abs(vector<T,N> x);
+__generic<T : __BuiltinSignedArithmeticType, let N : int, let M : int> matrix<T,N,M> abs(matrix<T,N,M> x);
 
 // Inverse cosine (HLSL SM 1.0)
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op T acos(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> acos(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> acos(matrix<T,N,M> x);
+__generic<T : __BuiltinFloatingPointType> T acos(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> vector<T,N> acos(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> matrix<T,N,M> acos(matrix<T,N,M> x);
 
 // Test if all components are non-zero (HLSL SM 1.0)
-__generic<T : __BuiltinType> __intrinsic_op T all(T x);
-__generic<T : __BuiltinType, let N : int> __intrinsic_op vector<T,N> all(vector<T,N> x);
-__generic<T : __BuiltinType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> all(matrix<T,N,M> x);
+__generic<T : __BuiltinType> T all(T x);
+__generic<T : __BuiltinType, let N : int> vector<T,N> all(vector<T,N> x);
+__generic<T : __BuiltinType, let N : int, let M : int> matrix<T,N,M> all(matrix<T,N,M> x);
 
 // Barrier for writes to all memory spaces (HLSL SM 5.0)
-__intrinsic_op void AllMemoryBarrier();
+void AllMemoryBarrier();
 
 // Thread-group sync and barrier for writes to all memory spaces (HLSL SM 5.0)
-__intrinsic_op void AllMemoryBarrierWithGroupSync();
+void AllMemoryBarrierWithGroupSync();
 
 // Test if any components is non-zero (HLSL SM 1.0)
-__generic<T : __BuiltinType> __intrinsic_op T any(T x);
-__generic<T : __BuiltinType, let N : int> __intrinsic_op vector<T,N> any(vector<T,N> x);
-__generic<T : __BuiltinType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> any(matrix<T,N,M> x);
+__generic<T : __BuiltinType> T any(T x);
+__generic<T : __BuiltinType, let N : int> vector<T,N> any(vector<T,N> x);
+__generic<T : __BuiltinType, let N : int, let M : int> matrix<T,N,M> any(matrix<T,N,M> x);
 
 
 // Reinterpret bits as a double (HLSL SM 5.0)
-__intrinsic_op double asdouble(uint lowbits, uint highbits);
+double asdouble(uint lowbits, uint highbits);
 
 // Reinterpret bits as a float (HLSL SM 4.0)
-__intrinsic_op float asfloat( int x);
-__intrinsic_op float asfloat(uint x);
-__generic<let N : int> __intrinsic_op vector<float,N> asfloat(vector< int,N> x);
-__generic<let N : int> __intrinsic_op vector<float,N> asfloat(vector<uint,N> x);
-__generic<let N : int, let M : int> __intrinsic_op matrix<float,N,M> asfloat(matrix< int,N,M> x);
-__generic<let N : int, let M : int> __intrinsic_op matrix<float,N,M> asfloat(matrix<uint,N,M> x);
+float asfloat( int x);
+float asfloat(uint x);
+__generic<let N : int> vector<float,N> asfloat(vector< int,N> x);
+__generic<let N : int> vector<float,N> asfloat(vector<uint,N> x);
+__generic<let N : int, let M : int> matrix<float,N,M> asfloat(matrix< int,N,M> x);
+__generic<let N : int, let M : int> matrix<float,N,M> asfloat(matrix<uint,N,M> x);
 
 
 // Inverse sine (HLSL SM 1.0)
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op T asin(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> asin(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> asin(matrix<T,N,M> x);
+__generic<T : __BuiltinFloatingPointType> T asin(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> vector<T,N> asin(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> matrix<T,N,M> asin(matrix<T,N,M> x);
 
 // Reinterpret bits as an int (HLSL SM 4.0)
-__intrinsic_op int asint(float x);
-__intrinsic_op int asint(uint x);
-__generic<let N : int> __intrinsic_op vector<int,N> asint(vector<float,N> x);
-__generic<let N : int> __intrinsic_op vector<int,N> asint(vector<uint,N> x);
-__generic<let N : int, let M : int> __intrinsic_op matrix<int,N,M> asint(matrix<float,N,M> x);
-__generic<let N : int, let M : int> __intrinsic_op matrix<int,N,M> asint(matrix<uint,N,M> x);
+int asint(float x);
+int asint(uint x);
+__generic<let N : int> vector<int,N> asint(vector<float,N> x);
+__generic<let N : int> vector<int,N> asint(vector<uint,N> x);
+__generic<let N : int, let M : int> matrix<int,N,M> asint(matrix<float,N,M> x);
+__generic<let N : int, let M : int> matrix<int,N,M> asint(matrix<uint,N,M> x);
 
 // Reinterpret bits of double as a uint (HLSL SM 5.0)
-__intrinsic_op void asuint(double value, out uint lowbits, out uint highbits);
+void asuint(double value, out uint lowbits, out uint highbits);
 
 // Reinterpret bits as a uint (HLSL SM 4.0)
-__intrinsic_op uint asuint(float x);
-__intrinsic_op uint asuint(int x);
-__generic<let N : int> __intrinsic_op vector<uint,N> asuint(vector<float,N> x);
-__generic<let N : int> __intrinsic_op vector<uint,N> asuint(vector<int,N> x);
-__generic<let N : int, let M : int> __intrinsic_op matrix<uint,N,M> asuint(matrix<float,N,M> x);
-__generic<let N : int, let M : int> __intrinsic_op matrix<uint,N,M> asuint(matrix<int,N,M> x);
+uint asuint(float x);
+uint asuint(int x);
+__generic<let N : int> vector<uint,N> asuint(vector<float,N> x);
+__generic<let N : int> vector<uint,N> asuint(vector<int,N> x);
+__generic<let N : int, let M : int> matrix<uint,N,M> asuint(matrix<float,N,M> x);
+__generic<let N : int, let M : int> matrix<uint,N,M> asuint(matrix<int,N,M> x);
 
 // Inverse tangent (HLSL SM 1.0)
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op T atan(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> atan(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> atan(matrix<T,N,M> x);
+__generic<T : __BuiltinFloatingPointType> T atan(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> vector<T,N> atan(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> matrix<T,N,M> atan(matrix<T,N,M> x);
 
 __generic<T : __BuiltinFloatingPointType>
 __target_intrinsic(glsl,"atan($0,$1)")
-__intrinsic_op
 T atan2(T y, T x);
 
 __generic<T : __BuiltinFloatingPointType, let N : int>
 __target_intrinsic(glsl,"atan($0,$1)")
-__intrinsic_op
 vector<T,N> atan2(vector<T,N> y, vector<T,N> x);
 
 __generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
 __target_intrinsic(glsl,"atan($0,$1)")
-__intrinsic_op
 matrix<T,N,M> atan2(matrix<T,N,M> y, matrix<T,N,M> x);
 
 // Ceiling (HLSL SM 1.0)
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op T ceil(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> ceil(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> ceil(matrix<T,N,M> x);
+__generic<T : __BuiltinFloatingPointType> T ceil(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> vector<T,N> ceil(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> matrix<T,N,M> ceil(matrix<T,N,M> x);
 
 
 // Check access status to tiled resource
-__intrinsic_op bool CheckAccessFullyMapped(uint status);
+bool CheckAccessFullyMapped(uint status);
 
 // Clamp (HLSL SM 1.0)
 __generic<T : __BuiltinArithmeticType> T clamp(T x, T min, T max);
@@ -331,9 +327,9 @@ __generic<T : __BuiltinArithmeticType, let N : int> vector<T,N> clamp(vector<T,N
 __generic<T : __BuiltinArithmeticType, let N : int, let M : int> matrix<T,N,M> clamp(matrix<T,N,M> x, matrix<T,N,M> min, matrix<T,N,M> max);
 
 // Clip (discard) fragment conditionally
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op void clip(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op void clip(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op void clip(matrix<T,N,M> x);
+__generic<T : __BuiltinFloatingPointType> void clip(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> void clip(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> void clip(matrix<T,N,M> x);
 
 // Cosine
 __generic<T : __BuiltinFloatingPointType> T cos(T x);
@@ -341,360 +337,348 @@ __generic<T : __BuiltinFloatingPointType, let N : int> vector<T,N> cos(vector<T,
 __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> matrix<T,N,M> cos(matrix<T,N,M> x);
 
 // Hyperbolic cosine
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op T cosh(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> cosh(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> cosh(matrix<T,N,M> x);
+__generic<T : __BuiltinFloatingPointType> T cosh(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> vector<T,N> cosh(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> matrix<T,N,M> cosh(matrix<T,N,M> x);
 
 // Population count
-__intrinsic_op uint countbits(uint value);
+uint countbits(uint value);
 
 // Cross product
-__generic<T : __BuiltinArithmeticType> __intrinsic_op vector<T,3> cross(vector<T,3> x, vector<T,3> y);
+__generic<T : __BuiltinArithmeticType> vector<T,3> cross(vector<T,3> x, vector<T,3> y);
 
 // Convert encoded color
-__intrinsic_op int4 D3DCOLORtoUBYTE4(float4 x);
+int4 D3DCOLORtoUBYTE4(float4 x);
 
 // Partial-difference derivatives
 __generic<T : __BuiltinFloatingPointType>
 __target_intrinsic(glsl, dFdx)
-__intrinsic_op
 T ddx(T x);
+
 __generic<T : __BuiltinFloatingPointType, let N : int>
 __target_intrinsic(glsl, dFdx)
-__intrinsic_op
 vector<T,N> ddx(vector<T,N> x);
+
 __generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
 __target_intrinsic(glsl, dFdx)
-__intrinsic_op
 matrix<T,N,M> ddx(matrix<T,N,M> x);
 
 __generic<T : __BuiltinFloatingPointType>
 __glsl_extension(GL_ARB_derivative_control)
 __target_intrinsic(glsl, dFdxCoarse)
-__intrinsic_op
 T ddx_coarse(T x);
+
 __generic<T : __BuiltinFloatingPointType, let N : int>
 __glsl_extension(GL_ARB_derivative_control)
 __target_intrinsic(glsl, dFdxCoarse)
-__intrinsic_op
 vector<T,N> ddx_coarse(vector<T,N> x);
+
 __generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
 __glsl_extension(GL_ARB_derivative_control)
 __target_intrinsic(glsl, dFdxCoarse)
-__intrinsic_op
 matrix<T,N,M> ddx_coarse(matrix<T,N,M> x);
 
 __generic<T : __BuiltinFloatingPointType>
 __glsl_extension(GL_ARB_derivative_control)
 __target_intrinsic(glsl, dFdxFine)
-__intrinsic_op
 T ddx_fine(T x);
+
 __generic<T : __BuiltinFloatingPointType, let N : int>
 __glsl_extension(GL_ARB_derivative_control)
 __target_intrinsic(glsl, dFdxFine)
-__intrinsic_op
 vector<T,N> ddx_fine(vector<T,N> x);
+
 __generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
 __glsl_extension(GL_ARB_derivative_control)
 __target_intrinsic(glsl, dFdxFine)
-__intrinsic_op
 matrix<T,N,M> ddx_fine(matrix<T,N,M> x);
 
 __generic<T : __BuiltinFloatingPointType>
 __target_intrinsic(glsl, dFdy)
-__intrinsic_op
 T ddy(T x);
+
 __generic<T : __BuiltinFloatingPointType, let N : int>
 __target_intrinsic(glsl, dFdy)
-__intrinsic_op
 vector<T,N> ddy(vector<T,N> x);
+
 __generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
 __target_intrinsic(glsl, dFdy)
-__intrinsic_op
  matrix<T,N,M> ddy(matrix<T,N,M> x);
 
 __generic<T : __BuiltinFloatingPointType>
 __glsl_extension(GL_ARB_derivative_control)
 __target_intrinsic(glsl, dFdyCoarse)
-__intrinsic_op
 T ddy_coarse(T x);
+
 __generic<T : __BuiltinFloatingPointType, let N : int>
 __glsl_extension(GL_ARB_derivative_control)
 __target_intrinsic(glsl, dFdyCoarse)
-__intrinsic_op
 vector<T,N> ddy_coarse(vector<T,N> x);
+
 __generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
 __glsl_extension(GL_ARB_derivative_control)
 __target_intrinsic(glsl, dFdyCoarse)
-__intrinsic_op
 matrix<T,N,M> ddy_coarse(matrix<T,N,M> x);
 
 __generic<T : __BuiltinFloatingPointType>
 __glsl_extension(GL_ARB_derivative_control)
 __target_intrinsic(glsl, dFdyFine)
-__intrinsic_op
 T ddy_fine(T x);
+
 __generic<T : __BuiltinFloatingPointType, let N : int>
 __glsl_extension(GL_ARB_derivative_control)
 __target_intrinsic(glsl, dFdyFine)
-__intrinsic_op
 vector<T,N> ddy_fine(vector<T,N> x);
+
 __generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
 __glsl_extension(GL_ARB_derivative_control)
 __target_intrinsic(glsl, dFdyFine)
-__intrinsic_op
 matrix<T,N,M> ddy_fine(matrix<T,N,M> x);
 
 
 // Radians to degrees
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op T degrees(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> degrees(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> degrees(matrix<T,N,M> x);
+__generic<T : __BuiltinFloatingPointType> T degrees(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> vector<T,N> degrees(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> matrix<T,N,M> degrees(matrix<T,N,M> x);
 
 // Matrix determinant
 
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op T determinant(matrix<T,N,N> m);
+__generic<T : __BuiltinFloatingPointType, let N : int> T determinant(matrix<T,N,N> m);
 
 // Barrier for device memory
-__intrinsic_op void DeviceMemoryBarrier();
-__intrinsic_op void DeviceMemoryBarrierWithGroupSync();
+void DeviceMemoryBarrier();
+void DeviceMemoryBarrierWithGroupSync();
 
 // Vector distance
 
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op T distance(vector<T,N> x, vector<T,N> y);
+__generic<T : __BuiltinFloatingPointType, let N : int> T distance(vector<T,N> x, vector<T,N> y);
 
 // Vector dot product
 
-__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op T dot(vector<T,N> x, vector<T,N> y);
+__generic<T : __BuiltinArithmeticType, let N : int> T dot(vector<T,N> x, vector<T,N> y);
 
 // Helper for computing distance terms for lighting (obsolete)
 
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op vector<T,4> dst(vector<T,4> x, vector<T,4> y);
+__generic<T : __BuiltinFloatingPointType> vector<T,4> dst(vector<T,4> x, vector<T,4> y);
 
 // Error message
 
-// __intrinsic_op void errorf( string format, ... );
+// void errorf( string format, ... );
 
 // Attribute evaluation
 
-__generic<T : __BuiltinArithmeticType> __intrinsic_op T EvaluateAttributeAtCentroid(T x);
-__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> EvaluateAttributeAtCentroid(vector<T,N> x);
-__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> EvaluateAttributeAtCentroid(matrix<T,N,M> x);
+__generic<T : __BuiltinArithmeticType> T EvaluateAttributeAtCentroid(T x);
+__generic<T : __BuiltinArithmeticType, let N : int> vector<T,N> EvaluateAttributeAtCentroid(vector<T,N> x);
+__generic<T : __BuiltinArithmeticType, let N : int, let M : int> matrix<T,N,M> EvaluateAttributeAtCentroid(matrix<T,N,M> x);
 
-__generic<T : __BuiltinArithmeticType> __intrinsic_op T EvaluateAttributeAtSample(T x, uint sampleindex);
-__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> EvaluateAttributeAtSample(vector<T,N> x, uint sampleindex);
-__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> EvaluateAttributeAtSample(matrix<T,N,M> x, uint sampleindex);
+__generic<T : __BuiltinArithmeticType> T EvaluateAttributeAtSample(T x, uint sampleindex);
+__generic<T : __BuiltinArithmeticType, let N : int> vector<T,N> EvaluateAttributeAtSample(vector<T,N> x, uint sampleindex);
+__generic<T : __BuiltinArithmeticType, let N : int, let M : int> matrix<T,N,M> EvaluateAttributeAtSample(matrix<T,N,M> x, uint sampleindex);
 
-__generic<T : __BuiltinArithmeticType> __intrinsic_op T EvaluateAttributeSnapped(T x, int2 offset);
-__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> EvaluateAttributeSnapped(vector<T,N> x, int2 offset);
-__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> EvaluateAttributeSnapped(matrix<T,N,M> x, int2 offset);
+__generic<T : __BuiltinArithmeticType> T EvaluateAttributeSnapped(T x, int2 offset);
+__generic<T : __BuiltinArithmeticType, let N : int> vector<T,N> EvaluateAttributeSnapped(vector<T,N> x, int2 offset);
+__generic<T : __BuiltinArithmeticType, let N : int, let M : int> matrix<T,N,M> EvaluateAttributeSnapped(matrix<T,N,M> x, int2 offset);
 
 // Base-e exponent
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op T exp(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> exp(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> exp(matrix<T,N,M> x);
+__generic<T : __BuiltinFloatingPointType> T exp(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> vector<T,N> exp(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> matrix<T,N,M> exp(matrix<T,N,M> x);
 
 // Base-2 exponent
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op T exp2(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> exp2(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> exp2(matrix<T,N,M> x);
+__generic<T : __BuiltinFloatingPointType> T exp2(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> vector<T,N> exp2(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> matrix<T,N,M> exp2(matrix<T,N,M> x);
 
 // Convert 16-bit float stored in low bits of integer
-__intrinsic_op float f16tof32(uint value);
-__generic<let N : int> __intrinsic_op vector<float,N> f16tof32(vector<uint,N> value);
+float f16tof32(uint value);
+__generic<let N : int> vector<float,N> f16tof32(vector<uint,N> value);
 
 // Convert to 16-bit float stored in low bits of integer
-__intrinsic_op uint f32tof16(float value);
-__generic<let N : int> __intrinsic_op vector<uint,N> f32tof16(vector<float,N> value);
+uint f32tof16(float value);
+__generic<let N : int> vector<uint,N> f32tof16(vector<float,N> value);
 
 // Flip surface normal to face forward, if needed
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> faceforward(vector<T,N> n, vector<T,N> i, vector<T,N> ng);
+__generic<T : __BuiltinFloatingPointType, let N : int> vector<T,N> faceforward(vector<T,N> n, vector<T,N> i, vector<T,N> ng);
 
 // Find first set bit starting at high bit and working down
-__intrinsic_op int firstbithigh(int value);
-__generic<let N : int> __intrinsic_op vector<int,N> firstbithigh(vector<int,N> value);
+int firstbithigh(int value);
+__generic<let N : int> vector<int,N> firstbithigh(vector<int,N> value);
 
-__intrinsic_op uint firstbithigh(uint value);
-__generic<let N : int> __intrinsic_op vector<uint,N> firstbithigh(vector<uint,N> value);
+uint firstbithigh(uint value);
+__generic<let N : int> vector<uint,N> firstbithigh(vector<uint,N> value);
 
 // Find first set bit starting at low bit and working up
-__intrinsic_op int firstbitlow(int value);
-__generic<let N : int> __intrinsic_op vector<int,N> firstbitlow(vector<int,N> value);
+int firstbitlow(int value);
+__generic<let N : int> vector<int,N> firstbitlow(vector<int,N> value);
 
-__intrinsic_op uint firstbitlow(uint value);
-__generic<let N : int> __intrinsic_op vector<uint,N> firstbitlow(vector<uint,N> value);
+uint firstbitlow(uint value);
+__generic<let N : int> vector<uint,N> firstbitlow(vector<uint,N> value);
 
 // Floor (HLSL SM 1.0)
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op T floor(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> floor(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> floor(matrix<T,N,M> x);
+__generic<T : __BuiltinFloatingPointType> T floor(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> vector<T,N> floor(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> matrix<T,N,M> floor(matrix<T,N,M> x);
 
 // Fused multiply-add for doubles
-__intrinsic_op double fma(double a, double b, double c);
-__generic<let N : int> __intrinsic_op vector<double, N> fma(vector<double, N> a, vector<double, N> b, vector<double, N> c);
-__generic<let N : int, let M : int> __intrinsic_op matrix<double,N,M> fma(matrix<double,N,M> a, matrix<double,N,M> b, matrix<double,N,M> c);
+double fma(double a, double b, double c);
+__generic<let N : int> vector<double, N> fma(vector<double, N> a, vector<double, N> b, vector<double, N> c);
+__generic<let N : int, let M : int> matrix<double,N,M> fma(matrix<double,N,M> a, matrix<double,N,M> b, matrix<double,N,M> c);
 
 // Floating point remainder of x/y
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op T fmod(T x, T y);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> fmod(vector<T,N> x, vector<T,N> y);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> fmod(matrix<T,N,M> x, matrix<T,N,M> y);
+__generic<T : __BuiltinFloatingPointType> T fmod(T x, T y);
+__generic<T : __BuiltinFloatingPointType, let N : int> vector<T,N> fmod(vector<T,N> x, vector<T,N> y);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> matrix<T,N,M> fmod(matrix<T,N,M> x, matrix<T,N,M> y);
 
 // Fractional part
 __generic<T : __BuiltinFloatingPointType>
 __target_intrinsic(glsl, fract)
-__intrinsic_op
 T frac(T x);
 
 __generic<T : __BuiltinFloatingPointType, let N : int>
 __target_intrinsic(glsl, fract)
-__intrinsic_op
 vector<T,N> frac(vector<T,N> x);
 
 __generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
 __target_intrinsic(glsl, fract)
-__intrinsic_op
 matrix<T,N,M> frac(matrix<T,N,M> x);
 
 // Split float into mantissa and exponent
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op T frexp(T x, out T exp);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> frexp(vector<T,N> x, out vector<T,N> exp);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> frexp(matrix<T,N,M> x, out matrix<T,N,M> exp);
+__generic<T : __BuiltinFloatingPointType> T frexp(T x, out T exp);
+__generic<T : __BuiltinFloatingPointType, let N : int> vector<T,N> frexp(vector<T,N> x, out vector<T,N> exp);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> matrix<T,N,M> frexp(matrix<T,N,M> x, out matrix<T,N,M> exp);
 
 // Texture filter width
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op T fwidth(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> fwidth(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> fwidth(matrix<T,N,M> x);
+__generic<T : __BuiltinFloatingPointType> T fwidth(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> vector<T,N> fwidth(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> matrix<T,N,M> fwidth(matrix<T,N,M> x);
 
 // Get number of samples in render target
-__intrinsic_op uint GetRenderTargetSampleCount();
+uint GetRenderTargetSampleCount();
 
 // Get position of given sample
-__intrinsic_op float2 GetRenderTargetSamplePosition(int Index);
+float2 GetRenderTargetSamplePosition(int Index);
 
 // Group memory barrier
-__intrinsic_op void GroupMemoryBarrier();
-__intrinsic_op void GroupMemoryBarrierWithGroupSync();
+void GroupMemoryBarrier();
+void GroupMemoryBarrierWithGroupSync();
 
 // Atomics
-__intrinsic_op void InterlockedAdd(in out  int dest,  int value, out  int original_value);
-__intrinsic_op void InterlockedAdd(in out uint dest, uint value, out uint original_value);
+void InterlockedAdd(in out  int dest,  int value, out  int original_value);
+void InterlockedAdd(in out uint dest, uint value, out uint original_value);
 
-__intrinsic_op void InterlockedAnd(in out  int dest,  int value, out  int original_value);
-__intrinsic_op void InterlockedAnd(in out uint dest, uint value, out uint original_value);
+void InterlockedAnd(in out  int dest,  int value, out  int original_value);
+void InterlockedAnd(in out uint dest, uint value, out uint original_value);
 
-__intrinsic_op void InterlockedCompareExchange(in out  int dest,  int compare_value,  int value, out  int original_value);
-__intrinsic_op void InterlockedCompareExchange(in out uint dest, uint compare_value, uint value, out uint original_value);
+void InterlockedCompareExchange(in out  int dest,  int compare_value,  int value, out  int original_value);
+void InterlockedCompareExchange(in out uint dest, uint compare_value, uint value, out uint original_value);
 
-__intrinsic_op void InterlockedCompareStore(in out  int dest,  int compare_value,  int value);
-__intrinsic_op void InterlockedCompareStore(in out uint dest, uint compare_value, uint value);
+void InterlockedCompareStore(in out  int dest,  int compare_value,  int value);
+void InterlockedCompareStore(in out uint dest, uint compare_value, uint value);
 
-__intrinsic_op void InterlockedExchange(in out  int dest,  int value, out  int original_value);
-__intrinsic_op void InterlockedExchange(in out uint dest, uint value, out uint original_value);
+void InterlockedExchange(in out  int dest,  int value, out  int original_value);
+void InterlockedExchange(in out uint dest, uint value, out uint original_value);
 
-__intrinsic_op void InterlockedMax(in out  int dest,  int value, out  int original_value);
-__intrinsic_op void InterlockedMax(in out uint dest, uint value, out uint original_value);
+void InterlockedMax(in out  int dest,  int value, out  int original_value);
+void InterlockedMax(in out uint dest, uint value, out uint original_value);
 
-__intrinsic_op void InterlockedMin(in out  int dest,  int value, out  int original_value);
-__intrinsic_op void InterlockedMin(in out uint dest, uint value, out uint original_value);
+void InterlockedMin(in out  int dest,  int value, out  int original_value);
+void InterlockedMin(in out uint dest, uint value, out uint original_value);
 
-__intrinsic_op void InterlockedOr(in out  int dest,  int value, out  int original_value);
-__intrinsic_op void InterlockedOr(in out uint dest, uint value, out uint original_value);
+void InterlockedOr(in out  int dest,  int value, out  int original_value);
+void InterlockedOr(in out uint dest, uint value, out uint original_value);
 
-__intrinsic_op void InterlockedXor(in out  int dest,  int value, out  int original_value);
-__intrinsic_op void InterlockedXor(in out uint dest, uint value, out uint original_value);
+void InterlockedXor(in out  int dest,  int value, out  int original_value);
+void InterlockedXor(in out uint dest, uint value, out uint original_value);
 
 // Is floating-point value finite?
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op bool isfinite(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<bool,N> isfinite(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<bool,N,M> isfinite(matrix<T,N,M> x);
+__generic<T : __BuiltinFloatingPointType> bool isfinite(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> vector<bool,N> isfinite(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> matrix<bool,N,M> isfinite(matrix<T,N,M> x);
 
 // Is floating-point value infinite?
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op bool isinf(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<bool,N> isinf(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<bool,N,M> isinf(matrix<T,N,M> x);
+__generic<T : __BuiltinFloatingPointType> bool isinf(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> vector<bool,N> isinf(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> matrix<bool,N,M> isinf(matrix<T,N,M> x);
 
 // Is floating-point value not-a-number?
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op bool isnan(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<bool,N> isnan(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<bool,N,M> isnan(matrix<T,N,M> x);
+__generic<T : __BuiltinFloatingPointType> bool isnan(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> vector<bool,N> isnan(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> matrix<bool,N,M> isnan(matrix<T,N,M> x);
 
 // Construct float from mantissa and exponent
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op T ldexp(T x, T exp);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> ldexp(vector<T,N> x, vector<T,N> exp);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> ldexp(matrix<T,N,M> x, matrix<T,N,M> exp);
+__generic<T : __BuiltinFloatingPointType> T ldexp(T x, T exp);
+__generic<T : __BuiltinFloatingPointType, let N : int> vector<T,N> ldexp(vector<T,N> x, vector<T,N> exp);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> matrix<T,N,M> ldexp(matrix<T,N,M> x, matrix<T,N,M> exp);
 
 // Vector length
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op T length(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int> T length(vector<T,N> x);
 
 // Linear interpolation
 __generic<T : __BuiltinFloatingPointType>
 __target_intrinsic(glsl, mix)
-__intrinsic_op
 T lerp(T x, T y, T s);
 
 __generic<T : __BuiltinFloatingPointType, let N : int>
 __target_intrinsic(glsl, mix)
-__intrinsic_op
 vector<T,N> lerp(vector<T,N> x, vector<T,N> y, vector<T,N> s);
 
 __generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
 __target_intrinsic(glsl, mix)
-__intrinsic_op
 matrix<T,N,M> lerp(matrix<T,N,M> x, matrix<T,N,M> y, matrix<T,N,M> s);
 
 // Legacy lighting function (obsolete)
-__intrinsic_op float4 lit(float n_dot_l, float n_dot_h, float m);
+float4 lit(float n_dot_l, float n_dot_h, float m);
 
 // Base-e logarithm
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op T log(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> log(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> log(matrix<T,N,M> x);
+__generic<T : __BuiltinFloatingPointType> T log(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> vector<T,N> log(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> matrix<T,N,M> log(matrix<T,N,M> x);
 
 // Base-10 logarithm
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op T log10(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> log10(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> log10(matrix<T,N,M> x);
+__generic<T : __BuiltinFloatingPointType> T log10(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> vector<T,N> log10(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> matrix<T,N,M> log10(matrix<T,N,M> x);
 
 // Base-2 logarithm
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op T log2(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> log2(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> log2(matrix<T,N,M> x);
+__generic<T : __BuiltinFloatingPointType> T log2(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> vector<T,N> log2(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> matrix<T,N,M> log2(matrix<T,N,M> x);
 
 // multiply-add
-__generic<T : __BuiltinArithmeticType> __intrinsic_op T mad(T mvalue, T avalue, T bvalue);
-__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> mad(vector<T,N> mvalue, vector<T,N> avalue, vector<T,N> bvalue);
-__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> mad(matrix<T,N,M> mvalue, matrix<T,N,M> avalue, matrix<T,N,M> bvalue);
+__generic<T : __BuiltinArithmeticType> T mad(T mvalue, T avalue, T bvalue);
+__generic<T : __BuiltinArithmeticType, let N : int> vector<T,N> mad(vector<T,N> mvalue, vector<T,N> avalue, vector<T,N> bvalue);
+__generic<T : __BuiltinArithmeticType, let N : int, let M : int> matrix<T,N,M> mad(matrix<T,N,M> mvalue, matrix<T,N,M> avalue, matrix<T,N,M> bvalue);
 
 // maximum
-__generic<T : __BuiltinArithmeticType> __intrinsic_op T max(T x, T y);
-__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> max(vector<T,N> x, vector<T,N> y);
-__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> max(matrix<T,N,M> x, matrix<T,N,M> y);
+__generic<T : __BuiltinArithmeticType> T max(T x, T y);
+__generic<T : __BuiltinArithmeticType, let N : int> vector<T,N> max(vector<T,N> x, vector<T,N> y);
+__generic<T : __BuiltinArithmeticType, let N : int, let M : int> matrix<T,N,M> max(matrix<T,N,M> x, matrix<T,N,M> y);
 
 // minimum
-__generic<T : __BuiltinArithmeticType> __intrinsic_op T min(T x, T y);
-__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> min(vector<T,N> x, vector<T,N> y);
-__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> min(matrix<T,N,M> x, matrix<T,N,M> y);
+__generic<T : __BuiltinArithmeticType> T min(T x, T y);
+__generic<T : __BuiltinArithmeticType, let N : int> vector<T,N> min(vector<T,N> x, vector<T,N> y);
+__generic<T : __BuiltinArithmeticType, let N : int, let M : int> matrix<T,N,M> min(matrix<T,N,M> x, matrix<T,N,M> y);
 
 // split into integer and fractional parts (both with same sign)
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op T modf(T x, out T ip);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> modf(vector<T,N> x, out vector<T,N> ip);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> modf(matrix<T,N,M> x, out matrix<T,N,M> ip);
+__generic<T : __BuiltinFloatingPointType> T modf(T x, out T ip);
+__generic<T : __BuiltinFloatingPointType, let N : int> vector<T,N> modf(vector<T,N> x, out vector<T,N> ip);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> matrix<T,N,M> modf(matrix<T,N,M> x, out matrix<T,N,M> ip);
 
 // msad4 (whatever that is)
-__intrinsic_op uint4 msad4(uint reference, uint2 source, uint4 accum);
+uint4 msad4(uint reference, uint2 source, uint4 accum);
 
 // General inner products
 
 // scalar-scalar
-__generic<T : __BuiltinArithmeticType> __intrinsic_op T mul(T x, T y);
+__generic<T : __BuiltinArithmeticType> T mul(T x, T y);
 
 // scalar-vector and vector-scalar
-__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> mul(vector<T,N> x, T y);
-__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> mul(T x, vector<T,N> y);
+__generic<T : __BuiltinArithmeticType, let N : int> vector<T,N> mul(vector<T,N> x, T y);
+__generic<T : __BuiltinArithmeticType, let N : int> vector<T,N> mul(T x, vector<T,N> y);
 
 // scalar-matrix and matrix-scalar
-__generic<T : __BuiltinArithmeticType, let N : int, let M :int> __intrinsic_op matrix<T,N,M> mul(matrix<T,N,M> x, T y);
-__generic<T : __BuiltinArithmeticType, let N : int, let M :int> __intrinsic_op matrix<T,N,M> mul(T x, matrix<T,N,M> y);
+__generic<T : __BuiltinArithmeticType, let N : int, let M :int> matrix<T,N,M> mul(matrix<T,N,M> x, T y);
+__generic<T : __BuiltinArithmeticType, let N : int, let M :int> matrix<T,N,M> mul(T x, matrix<T,N,M> y);
 
 // vector-vector (dot product)
 __generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op(dot) T mul(vector<T,N> x, vector<T,N> y);
@@ -709,86 +693,86 @@ __generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op(
 __generic<T : __BuiltinArithmeticType, let R : int, let N : int, let C : int> __intrinsic_op(mulMatrixMatrix) matrix<T,R,C> mul(matrix<T,R,N> x, matrix<T,N,C> y);
 
 // noise (deprecated)
-__intrinsic_op float noise(float x);
-__generic<let N : int> __intrinsic_op float noise(vector<float, N> x);
+float noise(float x);
+__generic<let N : int> float noise(vector<float, N> x);
 
 // Normalize a vector
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> normalize(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int> vector<T,N> normalize(vector<T,N> x);
 
 // Raise to a power
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op T pow(T x, T y);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> pow(vector<T,N> x, vector<T,N> y);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> pow(matrix<T,N,M> x, matrix<T,N,M> y);
+__generic<T : __BuiltinFloatingPointType> T pow(T x, T y);
+__generic<T : __BuiltinFloatingPointType, let N : int> vector<T,N> pow(vector<T,N> x, vector<T,N> y);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> matrix<T,N,M> pow(matrix<T,N,M> x, matrix<T,N,M> y);
 
 // Output message
 
-// __intrinsic_op void printf( string format, ... );
+// void printf( string format, ... );
 
 // Tessellation factor fixup routines
 
-__intrinsic_op void Process2DQuadTessFactorsAvg(
+void Process2DQuadTessFactorsAvg(
     in  float4 RawEdgeFactors,
     in  float2 InsideScale,
     out float4 RoundedEdgeTessFactors,
     out float2 RoundedInsideTessFactors,
     out float2 UnroundedInsideTessFactors);
 
-__intrinsic_op void Process2DQuadTessFactorsMax(
+void Process2DQuadTessFactorsMax(
     in  float4 RawEdgeFactors,
     in  float2 InsideScale,
     out float4 RoundedEdgeTessFactors,
     out float2 RoundedInsideTessFactors,
     out float2 UnroundedInsideTessFactors);
 
-__intrinsic_op void Process2DQuadTessFactorsMin(
+void Process2DQuadTessFactorsMin(
     in  float4 RawEdgeFactors,
     in  float2 InsideScale,
     out float4 RoundedEdgeTessFactors,
     out float2 RoundedInsideTessFactors,
     out float2 UnroundedInsideTessFactors);
 
-__intrinsic_op void ProcessIsolineTessFactors(
+void ProcessIsolineTessFactors(
     in  float RawDetailFactor,
     in  float RawDensityFactor,
     out float RoundedDetailFactor,
     out float RoundedDensityFactor);
 
-__intrinsic_op void ProcessQuadTessFactorsAvg(
+void ProcessQuadTessFactorsAvg(
     in  float4 RawEdgeFactors,
     in  float InsideScale,
     out float4 RoundedEdgeTessFactors,
     out float2 RoundedInsideTessFactors,
     out float2 UnroundedInsideTessFactors);
 
-__intrinsic_op void ProcessQuadTessFactorsMax(
+void ProcessQuadTessFactorsMax(
     in  float4 RawEdgeFactors,
     in  float InsideScale,
     out float4 RoundedEdgeTessFactors,
     out float2 RoundedInsideTessFactors,
     out float2 UnroundedInsideTessFactors);
 
-__intrinsic_op void ProcessQuadTessFactorsMin(
+void ProcessQuadTessFactorsMin(
     in  float4 RawEdgeFactors,
     in  float InsideScale,
     out float4 RoundedEdgeTessFactors,
     out float2 RoundedInsideTessFactors,
     out float2 UnroundedInsideTessFactors);
 
-__intrinsic_op void ProcessTriTessFactorsAvg(
+void ProcessTriTessFactorsAvg(
     in  float3 RawEdgeFactors,
     in  float InsideScale,
     out float3 RoundedEdgeTessFactors,
     out float RoundedInsideTessFactor,
     out float UnroundedInsideTessFactor);
 
-__intrinsic_op void ProcessTriTessFactorsMax(
+void ProcessTriTessFactorsMax(
     in  float3 RawEdgeFactors,
     in  float InsideScale,
     out float3 RoundedEdgeTessFactors,
     out float RoundedInsideTessFactor,
     out float UnroundedInsideTessFactor);
 
-__intrinsic_op void ProcessTriTessFactorsMin(
+void ProcessTriTessFactorsMin(
     in  float3 RawEdgeFactors,
     in  float InsideScale,
     out float3 RoundedEdgeTessFactors,
@@ -796,38 +780,36 @@ __intrinsic_op void ProcessTriTessFactorsMin(
     out float UnroundedInsideTessFactors);
 
 // Degrees to radians
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op T radians(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> radians(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> radians(matrix<T,N,M> x);
+__generic<T : __BuiltinFloatingPointType> T radians(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> vector<T,N> radians(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> matrix<T,N,M> radians(matrix<T,N,M> x);
 
 // Approximate reciprocal
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op T rcp(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> rcp(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> rcp(matrix<T,N,M> x);
+__generic<T : __BuiltinFloatingPointType> T rcp(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> vector<T,N> rcp(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> matrix<T,N,M> rcp(matrix<T,N,M> x);
 
 // Reflect incident vector across plane with given normal
 __generic<T : __BuiltinFloatingPointType, let N : int>
-__intrinsic_op
 vector<T,N> reflect(vector<T,N> i, vector<T,N> n);
 
 // Refract incident vector given surface normal and index of refraction
 __generic<T : __BuiltinFloatingPointType, let N : int>
-__intrinsic_op
 vector<T,N> refract(vector<T,N> i, vector<T,N> n, float eta);
 
 // Reverse order of bits
-__intrinsic_op uint reversebits(uint value);
+uint reversebits(uint value);
 __generic<let N : int> vector<uint,N> reversebits(vector<uint,N> value);
 
 // Round-to-nearest
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op T round(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> round(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> round(matrix<T,N,M> x);
+__generic<T : __BuiltinFloatingPointType> T round(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> vector<T,N> round(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> matrix<T,N,M> round(matrix<T,N,M> x);
 
 // Reciprocal of square root
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op T rsqrt(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> rsqrt(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> rsqrt(matrix<T,N,M> x);
+__generic<T : __BuiltinFloatingPointType> T rsqrt(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> vector<T,N> rsqrt(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> matrix<T,N,M> rsqrt(matrix<T,N,M> x);
 
 // Clamp value to [0,1] range
 __generic<T : __BuiltinFloatingPointType>
@@ -875,9 +857,9 @@ matrix<T,N,M> saturate(matrix<T,N,M> x)
 
 
 // Extract sign of value
-__generic<T : __BuiltinSignedArithmeticType> __intrinsic_op int sign(T x);
-__generic<T : __BuiltinSignedArithmeticType, let N : int> __intrinsic_op vector<int,N> sign(vector<T,N> x);
-__generic<T : __BuiltinSignedArithmeticType, let N : int, let M : int> __intrinsic_op matrix<int,N,M> sign(matrix<T,N,M> x);
+__generic<T : __BuiltinSignedArithmeticType> int sign(T x);
+__generic<T : __BuiltinSignedArithmeticType, let N : int> vector<int,N> sign(vector<T,N> x);
+__generic<T : __BuiltinSignedArithmeticType, let N : int, let M : int> matrix<int,N,M> sign(matrix<T,N,M> x);
 
 
 // Sine
@@ -886,127 +868,127 @@ __generic<T : __BuiltinFloatingPointType, let N : int> vector<T,N> sin(vector<T,
 __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> matrix<T,N,M> sin(matrix<T,N,M> x);
 
 // Sine and cosine
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op void sincos(T x, out T s, out T c);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op void sincos(vector<T,N> x, out vector<T,N> s, out vector<T,N> c);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op void sincos(matrix<T,N,M> x, out matrix<T,N,M> s, out matrix<T,N,M> c);
+__generic<T : __BuiltinFloatingPointType, let N : int> void sincos(T x, out T s, out T c);
+__generic<T : __BuiltinFloatingPointType, let N : int> void sincos(vector<T,N> x, out vector<T,N> s, out vector<T,N> c);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> void sincos(matrix<T,N,M> x, out matrix<T,N,M> s, out matrix<T,N,M> c);
 
 // Hyperbolic Sine
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op T sinh(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> sinh(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> sinh(matrix<T,N,M> x);
+__generic<T : __BuiltinFloatingPointType> T sinh(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> vector<T,N> sinh(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> matrix<T,N,M> sinh(matrix<T,N,M> x);
 
 // Smooth step (Hermite interpolation)
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op T smoothstep(T min, T max, T x);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> smoothstep(vector<T,N> min, vector<T,N> max, vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> smoothstep(matrix<T,N,M> min, matrix<T,N,M> max, matrix<T,N,M> x);
+__generic<T : __BuiltinFloatingPointType> T smoothstep(T min, T max, T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> vector<T,N> smoothstep(vector<T,N> min, vector<T,N> max, vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> matrix<T,N,M> smoothstep(matrix<T,N,M> min, matrix<T,N,M> max, matrix<T,N,M> x);
 
 // Square root
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op T sqrt(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> sqrt(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> sqrt(matrix<T,N,M> x);
+__generic<T : __BuiltinFloatingPointType> T sqrt(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> vector<T,N> sqrt(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> matrix<T,N,M> sqrt(matrix<T,N,M> x);
 
 // Step function
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op T step(T y, T x);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> step(vector<T,N> y, vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> step(matrix<T,N,M> y, matrix<T,N,M> x);
+__generic<T : __BuiltinFloatingPointType> T step(T y, T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> vector<T,N> step(vector<T,N> y, vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> matrix<T,N,M> step(matrix<T,N,M> y, matrix<T,N,M> x);
 
 // Tangent
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op T tan(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> tan(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> tan(matrix<T,N,M> x);
+__generic<T : __BuiltinFloatingPointType> T tan(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> vector<T,N> tan(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> matrix<T,N,M> tan(matrix<T,N,M> x);
 
 // Hyperbolic tangent
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op T tanh(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> tanh(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> tanh(matrix<T,N,M> x);
+__generic<T : __BuiltinFloatingPointType> T tanh(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> vector<T,N> tanh(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> matrix<T,N,M> tanh(matrix<T,N,M> x);
 
 // Legacy texture-fetch operations
 
 /*
-__intrinsic_op float4 tex1D(sampler1D s, float t);
-__intrinsic_op float4 tex1D(sampler1D s, float t, float ddx, float ddy);
-__intrinsic_op float4 tex1Dbias(sampler1D s, float4 t);
-__intrinsic_op float4 tex1Dgrad(sampler1D s, float t, float ddx, float ddy);
-__intrinsic_op float4 tex1Dlod(sampler1D s, float4 t);
-__intrinsic_op float4 tex1Dproj(sampler1D s, float4 t);
-
-__intrinsic_op float4 tex2D(sampler2D s, float2 t);
-__intrinsic_op float4 tex2D(sampler2D s, float2 t, float2 ddx, float2 ddy);
-__intrinsic_op float4 tex2Dbias(sampler2D s, float4 t);
-__intrinsic_op float4 tex2Dgrad(sampler2D s, float2 t, float2 ddx, float2 ddy);
-__intrinsic_op float4 tex2Dlod(sampler2D s, float4 t);
-__intrinsic_op float4 tex2Dproj(sampler2D s, float4 t);
-
-__intrinsic_op float4 tex3D(sampler3D s, float3 t);
-__intrinsic_op float4 tex3D(sampler3D s, float3 t, float3 ddx, float3 ddy);
-__intrinsic_op float4 tex3Dbias(sampler3D s, float4 t);
-__intrinsic_op float4 tex3Dgrad(sampler3D s, float3 t, float3 ddx, float3 ddy);
-__intrinsic_op float4 tex3Dlod(sampler3D s, float4 t);
-__intrinsic_op float4 tex3Dproj(sampler3D s, float4 t);
-
-__intrinsic_op float4 texCUBE(samplerCUBE s, float3 t);
-__intrinsic_op float4 texCUBE(samplerCUBE s, float3 t, float3 ddx, float3 ddy);
-__intrinsic_op float4 texCUBEbias(samplerCUBE s, float4 t);
-__intrinsic_op float4 texCUBEgrad(samplerCUBE s, float3 t, float3 ddx, float3 ddy);
-__intrinsic_op float4 texCUBElod(samplerCUBE s, float4 t);
-__intrinsic_op float4 texCUBEproj(samplerCUBE s, float4 t);
+float4 tex1D(sampler1D s, float t);
+float4 tex1D(sampler1D s, float t, float ddx, float ddy);
+float4 tex1Dbias(sampler1D s, float4 t);
+float4 tex1Dgrad(sampler1D s, float t, float ddx, float ddy);
+float4 tex1Dlod(sampler1D s, float4 t);
+float4 tex1Dproj(sampler1D s, float4 t);
+
+float4 tex2D(sampler2D s, float2 t);
+float4 tex2D(sampler2D s, float2 t, float2 ddx, float2 ddy);
+float4 tex2Dbias(sampler2D s, float4 t);
+float4 tex2Dgrad(sampler2D s, float2 t, float2 ddx, float2 ddy);
+float4 tex2Dlod(sampler2D s, float4 t);
+float4 tex2Dproj(sampler2D s, float4 t);
+
+float4 tex3D(sampler3D s, float3 t);
+float4 tex3D(sampler3D s, float3 t, float3 ddx, float3 ddy);
+float4 tex3Dbias(sampler3D s, float4 t);
+float4 tex3Dgrad(sampler3D s, float3 t, float3 ddx, float3 ddy);
+float4 tex3Dlod(sampler3D s, float4 t);
+float4 tex3Dproj(sampler3D s, float4 t);
+
+float4 texCUBE(samplerCUBE s, float3 t);
+float4 texCUBE(samplerCUBE s, float3 t, float3 ddx, float3 ddy);
+float4 texCUBEbias(samplerCUBE s, float4 t);
+float4 texCUBEgrad(samplerCUBE s, float3 t, float3 ddx, float3 ddy);
+float4 texCUBElod(samplerCUBE s, float4 t);
+float4 texCUBEproj(samplerCUBE s, float4 t);
 */
 
 // Matrix transpose
-__generic<T : __BuiltinType, let N : int, let M : int> __intrinsic_op matrix<T,M,N> transpose(matrix<T,N,M> x);
+__generic<T : __BuiltinType, let N : int, let M : int> matrix<T,M,N> transpose(matrix<T,N,M> x);
 
 // Truncate to integer
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op T trunc(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> trunc(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> trunc(matrix<T,N,M> x);
+__generic<T : __BuiltinFloatingPointType> T trunc(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> vector<T,N> trunc(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> matrix<T,N,M> trunc(matrix<T,N,M> x);
 
 // Shader model 6.0 stuff
 
-__intrinsic_op uint GlobalOrderedCountIncrement(uint countToAppendForThisLane);
+uint GlobalOrderedCountIncrement(uint countToAppendForThisLane);
 
-__generic<T : __BuiltinType> __intrinsic_op T QuadReadLaneAt(T sourceValue, int quadLaneID);
-__generic<T : __BuiltinType, let N : int> __intrinsic_op vector<T,N> QuadReadLaneAt(vector<T,N> sourceValue, int quadLaneID);
-__generic<T : __BuiltinType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> QuadReadLaneAt(matrix<T,N,M> sourceValue, int quadLaneID);
+__generic<T : __BuiltinType> T QuadReadLaneAt(T sourceValue, int quadLaneID);
+__generic<T : __BuiltinType, let N : int> vector<T,N> QuadReadLaneAt(vector<T,N> sourceValue, int quadLaneID);
+__generic<T : __BuiltinType, let N : int, let M : int> matrix<T,N,M> QuadReadLaneAt(matrix<T,N,M> sourceValue, int quadLaneID);
 
-__generic<T : __BuiltinType> __intrinsic_op T QuadSwapX(T localValue);
-__generic<T : __BuiltinType, let N : int> __intrinsic_op vector<T,N> QuadSwapX(vector<T,N> localValue);
-__generic<T : __BuiltinType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> QuadSwapX(matrix<T,N,M> localValue);
+__generic<T : __BuiltinType> T QuadSwapX(T localValue);
+__generic<T : __BuiltinType, let N : int> vector<T,N> QuadSwapX(vector<T,N> localValue);
+__generic<T : __BuiltinType, let N : int, let M : int> matrix<T,N,M> QuadSwapX(matrix<T,N,M> localValue);
 
-__generic<T : __BuiltinType> __intrinsic_op T QuadSwapY(T localValue);
-__generic<T : __BuiltinType, let N : int> __intrinsic_op vector<T,N> QuadSwapY(vector<T,N> localValue);
-__generic<T : __BuiltinType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> QuadSwapY(matrix<T,N,M> localValue);
+__generic<T : __BuiltinType> T QuadSwapY(T localValue);
+__generic<T : __BuiltinType, let N : int> vector<T,N> QuadSwapY(vector<T,N> localValue);
+__generic<T : __BuiltinType, let N : int, let M : int> matrix<T,N,M> QuadSwapY(matrix<T,N,M> localValue);
 
-__generic<T : __BuiltinIntegerType> __intrinsic_op T WaveAllBitAnd(T expr);
-__generic<T : __BuiltinIntegerType, let N : int> __intrinsic_op vector<T,N> WaveAllBitAnd(vector<T,N> expr);
-__generic<T : __BuiltinIntegerType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> WaveAllBitAnd(matrix<T,N,M> expr);
+__generic<T : __BuiltinIntegerType> T WaveAllBitAnd(T expr);
+__generic<T : __BuiltinIntegerType, let N : int> vector<T,N> WaveAllBitAnd(vector<T,N> expr);
+__generic<T : __BuiltinIntegerType, let N : int, let M : int> matrix<T,N,M> WaveAllBitAnd(matrix<T,N,M> expr);
 
-__generic<T : __BuiltinIntegerType> __intrinsic_op T WaveAllBitOr(T expr);
-__generic<T : __BuiltinIntegerType, let N : int> __intrinsic_op vector<T,N> WaveAllBitOr(vector<T,N> expr);
-__generic<T : __BuiltinIntegerType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> WaveAllBitOr(matrix<T,N,M> expr);
+__generic<T : __BuiltinIntegerType> T WaveAllBitOr(T expr);
+__generic<T : __BuiltinIntegerType, let N : int> vector<T,N> WaveAllBitOr(vector<T,N> expr);
+__generic<T : __BuiltinIntegerType, let N : int, let M : int> matrix<T,N,M> WaveAllBitOr(matrix<T,N,M> expr);
 
-__generic<T : __BuiltinIntegerType> __intrinsic_op T WaveAllBitXor(T expr);
-__generic<T : __BuiltinIntegerType, let N : int> __intrinsic_op vector<T,N> WaveAllBitXor(vector<T,N> expr);
-__generic<T : __BuiltinIntegerType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> WaveAllBitXor(matrix<T,N,M> expr);
+__generic<T : __BuiltinIntegerType> T WaveAllBitXor(T expr);
+__generic<T : __BuiltinIntegerType, let N : int> vector<T,N> WaveAllBitXor(vector<T,N> expr);
+__generic<T : __BuiltinIntegerType, let N : int, let M : int> matrix<T,N,M> WaveAllBitXor(matrix<T,N,M> expr);
 
-__generic<T : __BuiltinArithmeticType> __intrinsic_op T WaveAllMax(T expr);
-__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> WaveAllMax(vector<T,N> expr);
-__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> WaveAllMax(matrix<T,N,M> expr);
+__generic<T : __BuiltinArithmeticType> T WaveAllMax(T expr);
+__generic<T : __BuiltinArithmeticType, let N : int> vector<T,N> WaveAllMax(vector<T,N> expr);
+__generic<T : __BuiltinArithmeticType, let N : int, let M : int> matrix<T,N,M> WaveAllMax(matrix<T,N,M> expr);
 
-__generic<T : __BuiltinArithmeticType> __intrinsic_op T WaveAllMin(T expr);
-__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> WaveAllMin(vector<T,N> expr);
-__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> WaveAllMin(matrix<T,N,M> expr);
+__generic<T : __BuiltinArithmeticType> T WaveAllMin(T expr);
+__generic<T : __BuiltinArithmeticType, let N : int> vector<T,N> WaveAllMin(vector<T,N> expr);
+__generic<T : __BuiltinArithmeticType, let N : int, let M : int> matrix<T,N,M> WaveAllMin(matrix<T,N,M> expr);
 
-__generic<T : __BuiltinArithmeticType> __intrinsic_op T WaveAllProduct(T expr);
-__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> WaveAllProduct(vector<T,N> expr);
-__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> WaveAllProduct(matrix<T,N,M> expr);
+__generic<T : __BuiltinArithmeticType> T WaveAllProduct(T expr);
+__generic<T : __BuiltinArithmeticType, let N : int> vector<T,N> WaveAllProduct(vector<T,N> expr);
+__generic<T : __BuiltinArithmeticType, let N : int, let M : int> matrix<T,N,M> WaveAllProduct(matrix<T,N,M> expr);
 
-__generic<T : __BuiltinArithmeticType> __intrinsic_op T WaveAllSum(T expr);
-__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> WaveAllSum(vector<T,N> expr);
-__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> WaveAllSum(matrix<T,N,M> expr);
+__generic<T : __BuiltinArithmeticType> T WaveAllSum(T expr);
+__generic<T : __BuiltinArithmeticType, let N : int> vector<T,N> WaveAllSum(vector<T,N> expr);
+__generic<T : __BuiltinArithmeticType, let N : int, let M : int> matrix<T,N,M> WaveAllSum(matrix<T,N,M> expr);
 
-__intrinsic_op bool WaveAllEqual(bool expr);
-__intrinsic_op bool WaveAllTrue(bool expr);
-__intrinsic_op bool WaveAnyTrue(bool expr);
+bool WaveAllEqual(bool expr);
+bool WaveAllTrue(bool expr);
+bool WaveAnyTrue(bool expr);
 
 uint64_t WaveBallot(bool expr);
 
@@ -1018,21 +1000,21 @@ bool WaveIsHelperLane();
 
 bool WaveOnce();
 
-__generic<T : __BuiltinArithmeticType> __intrinsic_op T WavePrefixProduct(T expr);
-__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> WavePrefixProduct(vector<T,N> expr);
-__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> WavePrefixProduct(matrix<T,N,M> expr);
+__generic<T : __BuiltinArithmeticType> T WavePrefixProduct(T expr);
+__generic<T : __BuiltinArithmeticType, let N : int> vector<T,N> WavePrefixProduct(vector<T,N> expr);
+__generic<T : __BuiltinArithmeticType, let N : int, let M : int> matrix<T,N,M> WavePrefixProduct(matrix<T,N,M> expr);
 
-__generic<T : __BuiltinArithmeticType> __intrinsic_op T WavePrefixSum(T expr);
-__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> WavePrefixSum(vector<T,N> expr);
-__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> WavePrefixSum(matrix<T,N,M> expr);
+__generic<T : __BuiltinArithmeticType> T WavePrefixSum(T expr);
+__generic<T : __BuiltinArithmeticType, let N : int> vector<T,N> WavePrefixSum(vector<T,N> expr);
+__generic<T : __BuiltinArithmeticType, let N : int, let M : int> matrix<T,N,M> WavePrefixSum(matrix<T,N,M> expr);
 
-__generic<T : __BuiltinType> __intrinsic_op T WaveReadFirstLane(T expr);
-__generic<T : __BuiltinType, let N : int> __intrinsic_op vector<T,N> WaveReadFirstLane(vector<T,N> expr);
-__generic<T : __BuiltinType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> WaveReadFirstLane(matrix<T,N,M> expr);
+__generic<T : __BuiltinType> T WaveReadFirstLane(T expr);
+__generic<T : __BuiltinType, let N : int> vector<T,N> WaveReadFirstLane(vector<T,N> expr);
+__generic<T : __BuiltinType, let N : int, let M : int> matrix<T,N,M> WaveReadFirstLane(matrix<T,N,M> expr);
 
-__generic<T : __BuiltinType> __intrinsic_op T WaveReadLaneAt(T expr, int laneIndex);
-__generic<T : __BuiltinType, let N : int> __intrinsic_op vector<T,N> WaveReadLaneAt(vector<T,N> expr, int laneIndex);
-__generic<T : __BuiltinType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> WaveReadLaneAt(matrix<T,N,M> expr, int laneIndex);
+__generic<T : __BuiltinType> T WaveReadLaneAt(T expr, int laneIndex);
+__generic<T : __BuiltinType, let N : int> vector<T,N> WaveReadLaneAt(vector<T,N> expr, int laneIndex);
+__generic<T : __BuiltinType, let N : int, let M : int> matrix<T,N,M> WaveReadLaneAt(matrix<T,N,M> expr, int laneIndex);
 
 // `typedef`s to help with the fact that HLSL has been sorta-kinda case insensitive at various points
 typedef Texture2D texture2D;
@@ -1092,15 +1074,15 @@ for (int aa = 0; aa < kBaseBufferAccessLevelCount; ++aa)
     sb << kBaseBufferAccessLevels[aa].name;
     sb << "Buffer {\n";
 
-    sb << "__intrinsic_op void GetDimensions(out uint dim);\n";
+    sb << "void GetDimensions(out uint dim);\n";
 
     sb << "__target_intrinsic(glsl, \"texelFetch($$P, $0)$$z\")\n";
-    sb << "__intrinsic_op T Load(int location);\n";
+    sb << "T Load(int location);\n";
 
-    sb << "__intrinsic_op T Load(int location, out uint status);\n";
+    sb << "T Load(int location, out uint status);\n";
 
     sb << "__target_intrinsic(glsl, \"texelFetch($$P, int($0))$$z\")\n";
-    sb << "__intrinsic_op __subscript(uint index) -> T";
+    sb << "__subscript(uint index) -> T";
 
     if (kBaseBufferAccessLevels[aa].access != SLANG_RESOURCE_ACCESS_READ)
     {
diff --git a/source/slang/hlsl.meta.slang.h b/source/slang/hlsl.meta.slang.h
index dfbdbe57b..c9ccfcc81 100644
--- a/source/slang/hlsl.meta.slang.h
+++ b/source/slang/hlsl.meta.slang.h
@@ -4,29 +4,29 @@ sb << "typedef uint UINT;\n";
 sb << "\n";
 sb << "__generic<T> __magic_type(HLSLAppendStructuredBufferType) struct AppendStructuredBuffer\n";
 sb << "{\n";
-sb << "    __intrinsic_op void Append(T value);\n";
+sb << "    void Append(T value);\n";
 sb << "\n";
-sb << "    __intrinsic_op void GetDimensions(\n";
+sb << "    void GetDimensions(\n";
 sb << "        out uint numStructs,\n";
 sb << "        out uint stride);\n";
 sb << "};\n";
 sb << "\n";
 sb << "__magic_type(HLSLByteAddressBufferType) struct ByteAddressBuffer\n";
 sb << "{\n";
-sb << "    __intrinsic_op void GetDimensions(\n";
+sb << "    void GetDimensions(\n";
 sb << "        out uint dim);\n";
 sb << "\n";
-sb << "    __intrinsic_op uint Load(int location);\n";
-sb << "    __intrinsic_op uint Load(int location, out uint status);\n";
+sb << "    uint Load(int location);\n";
+sb << "    uint Load(int location, out uint status);\n";
 sb << "\n";
-sb << "    __intrinsic_op uint2 Load2(int location);\n";
-sb << "    __intrinsic_op uint2 Load2(int location, out uint status);\n";
+sb << "    uint2 Load2(int location);\n";
+sb << "    uint2 Load2(int location, out uint status);\n";
 sb << "\n";
-sb << "    __intrinsic_op uint3 Load3(int location);\n";
-sb << "    __intrinsic_op uint3 Load3(int location, out uint status);\n";
+sb << "    uint3 Load3(int location);\n";
+sb << "    uint3 Load3(int location, out uint status);\n";
 sb << "\n";
-sb << "    __intrinsic_op uint4 Load4(int location);\n";
-sb << "    __intrinsic_op uint4 Load4(int location, out uint status);\n";
+sb << "    uint4 Load4(int location);\n";
+sb << "    uint4 Load4(int location, out uint status);\n";
 sb << "};\n";
 sb << "\n";
 sb << "__generic<T>\n";
@@ -38,12 +38,12 @@ sb << "__intrinsic_type(";
 sb << ")\n";
 sb << "struct StructuredBuffer\n";
 sb << "{\n";
-sb << "    __intrinsic_op void GetDimensions(\n";
+sb << "    void GetDimensions(\n";
 sb << "        out uint numStructs,\n";
 sb << "        out uint stride);\n";
 sb << "\n";
-sb << "    __intrinsic_op T Load(int location);\n";
-sb << "    __intrinsic_op T Load(int location, out uint status);\n";
+sb << "    T Load(int location);\n";
+sb << "    T Load(int location, out uint status);\n";
 sb << "\n";
 sb << "    __intrinsic_op(bufferLoad)\n";
 sb << "    __subscript(uint index) -> T;\n";
@@ -51,21 +51,21 @@ sb << "};\n";
 sb << "\n";
 sb << "__generic<T> __magic_type(HLSLConsumeStructuredBufferType) struct ConsumeStructuredBuffer\n";
 sb << "{\n";
-sb << "    __intrinsic_op T Consume();\n";
+sb << "    T Consume();\n";
 sb << "\n";
-sb << "    __intrinsic_op void GetDimensions(\n";
+sb << "    void GetDimensions(\n";
 sb << "        out uint numStructs,\n";
 sb << "        out uint stride);\n";
 sb << "};\n";
 sb << "\n";
 sb << "__generic<T, let N : int> __magic_type(HLSLInputPatchType) struct InputPatch\n";
 sb << "{\n";
-sb << "    __intrinsic_op __subscript(uint index) -> T;\n";
+sb << "    __subscript(uint index) -> T;\n";
 sb << "};\n";
 sb << "\n";
 sb << "__generic<T, let N : int> __magic_type(HLSLOutputPatchType) struct OutputPatch\n";
 sb << "{\n";
-sb << "    __intrinsic_op __subscript(uint index) -> T { set; }\n";
+sb << "    __subscript(uint index) -> T { set; }\n";
 sb << "};\n";
 sb << "\n";
 sb << "__magic_type(HLSLRWByteAddressBufferType) struct RWByteAddressBuffer\n";
@@ -73,110 +73,110 @@ sb << "{\n";
 sb << "    // Note(tfoley): supports alll operations from `ByteAddressBuffer`\n";
 sb << "    // TODO(tfoley): can this be made a sub-type?\n";
 sb << "\n";
-sb << "    __intrinsic_op void GetDimensions(\n";
+sb << "    void GetDimensions(\n";
 sb << "        out uint dim);\n";
 sb << "\n";
-sb << "    __intrinsic_op uint Load(int location);\n";
-sb << "    __intrinsic_op uint Load(int location, out uint status);\n";
+sb << "    uint Load(int location);\n";
+sb << "    uint Load(int location, out uint status);\n";
 sb << "\n";
-sb << "    __intrinsic_op uint2 Load2(int location);\n";
-sb << "    __intrinsic_op uint2 Load2(int location, out uint status);\n";
+sb << "    uint2 Load2(int location);\n";
+sb << "    uint2 Load2(int location, out uint status);\n";
 sb << "\n";
-sb << "    __intrinsic_op uint3 Load3(int location);\n";
-sb << "    __intrinsic_op uint3 Load3(int location, out uint status);\n";
+sb << "    uint3 Load3(int location);\n";
+sb << "    uint3 Load3(int location, out uint status);\n";
 sb << "\n";
-sb << "    __intrinsic_op uint4 Load4(int location);\n";
-sb << "    __intrinsic_op uint4 Load4(int location, out uint status);\n";
+sb << "    uint4 Load4(int location);\n";
+sb << "    uint4 Load4(int location, out uint status);\n";
 sb << "\n";
 sb << "    // Added operations:\n";
 sb << "\n";
-sb << "    __intrinsic_op void InterlockedAdd(\n";
+sb << "    void InterlockedAdd(\n";
 sb << "        UINT dest,\n";
 sb << "        UINT value,\n";
 sb << "        out UINT original_value);\n";
-sb << "    __intrinsic_op void InterlockedAdd(\n";
+sb << "    void InterlockedAdd(\n";
 sb << "        UINT dest,\n";
 sb << "        UINT value);\n";
 sb << "\n";
-sb << "    __intrinsic_op void InterlockedAnd(\n";
+sb << "    void InterlockedAnd(\n";
 sb << "        UINT dest,\n";
 sb << "        UINT value,\n";
 sb << "        out UINT original_value);\n";
-sb << "    __intrinsic_op void InterlockedAnd(\n";
+sb << "    void InterlockedAnd(\n";
 sb << "        UINT dest,\n";
 sb << "        UINT value);\n";
 sb << "\n";
-sb << "    __intrinsic_op void InterlockedCompareExchange(\n";
+sb << "    void InterlockedCompareExchange(\n";
 sb << "        UINT dest,\n";
 sb << "        UINT compare_value,\n";
 sb << "        UINT value,\n";
 sb << "        out UINT original_value);\n";
-sb << "    __intrinsic_op void InterlockedCompareExchange(\n";
+sb << "    void InterlockedCompareExchange(\n";
 sb << "        UINT dest,\n";
 sb << "        UINT compare_value,\n";
 sb << "        UINT value);\n";
 sb << "\n";
-sb << "    __intrinsic_op void InterlockedCompareStore(\n";
+sb << "    void InterlockedCompareStore(\n";
 sb << "        UINT dest,\n";
 sb << "        UINT compare_value,\n";
 sb << "        UINT value);\n";
-sb << "    __intrinsic_op void InterlockedCompareStore(\n";
+sb << "    void InterlockedCompareStore(\n";
 sb << "        UINT dest,\n";
 sb << "        UINT compare_value);\n";
 sb << "\n";
-sb << "    __intrinsic_op void InterlockedExchange(\n";
+sb << "    void InterlockedExchange(\n";
 sb << "        UINT dest,\n";
 sb << "        UINT value,\n";
 sb << "        out UINT original_value);\n";
-sb << "    __intrinsic_op void InterlockedExchange(\n";
+sb << "    void InterlockedExchange(\n";
 sb << "        UINT dest,\n";
 sb << "        UINT value);\n";
 sb << "\n";
-sb << "    __intrinsic_op void InterlockedMax(\n";
+sb << "    void InterlockedMax(\n";
 sb << "        UINT dest,\n";
 sb << "        UINT value,\n";
 sb << "        out UINT original_value);\n";
-sb << "    __intrinsic_op void InterlockedMax(\n";
+sb << "    void InterlockedMax(\n";
 sb << "        UINT dest,\n";
 sb << "        UINT value);\n";
 sb << "\n";
-sb << "    __intrinsic_op void InterlockedMin(\n";
+sb << "    void InterlockedMin(\n";
 sb << "        UINT dest,\n";
 sb << "        UINT value,\n";
 sb << "        out UINT original_value);\n";
-sb << "    __intrinsic_op void InterlockedMin(\n";
+sb << "    void InterlockedMin(\n";
 sb << "        UINT dest,\n";
 sb << "        UINT value);\n";
 sb << "\n";
-sb << "    __intrinsic_op void InterlockedOr(\n";
+sb << "    void InterlockedOr(\n";
 sb << "        UINT dest,\n";
 sb << "        UINT value,\n";
 sb << "        out UINT original_value);\n";
-sb << "    __intrinsic_op void InterlockedOr(\n";
+sb << "    void InterlockedOr(\n";
 sb << "        UINT dest,\n";
 sb << "        UINT value);\n";
 sb << "\n";
-sb << "    __intrinsic_op void InterlockedXor(\n";
+sb << "    void InterlockedXor(\n";
 sb << "        UINT dest,\n";
 sb << "        UINT value,\n";
 sb << "        out UINT original_value);\n";
-sb << "    __intrinsic_op void InterlockedXor(\n";
+sb << "    void InterlockedXor(\n";
 sb << "        UINT dest,\n";
 sb << "        UINT value);\n";
 sb << "\n";
-sb << "    __intrinsic_op void Store(\n";
+sb << "    void Store(\n";
 sb << "        uint address,\n";
 sb << "        uint value);\n";
 sb << "\n";
-sb << "    __intrinsic_op void Store2(\n";
+sb << "    void Store2(\n";
 sb << "        uint address,\n";
 sb << "        uint2 value);\n";
 sb << "\n";
-sb << "    __intrinsic_op void Store3(\n";
+sb << "    void Store3(\n";
 sb << "        uint address,\n";
 sb << "        uint3 value);\n";
 sb << "\n";
-sb << "    __intrinsic_op void Store4(\n";
+sb << "    void Store4(\n";
 sb << "        uint address,\n";
 sb << "        uint4 value);\n";
 sb << "};\n";
@@ -190,18 +190,17 @@ sb << "__intrinsic_type(";
 sb << ")\n";
 sb << "struct RWStructuredBuffer\n";
 sb << "{\n";
-sb << "    __intrinsic_op uint DecrementCounter();\n";
+sb << "    uint DecrementCounter();\n";
 sb << "\n";
-sb << "    __intrinsic_op void GetDimensions(\n";
+sb << "    void GetDimensions(\n";
 sb << "        out uint numStructs,\n";
 sb << "        out uint stride);\n";
 sb << "\n";
-sb << "    __intrinsic_op uint IncrementCounter();\n";
+sb << "    uint IncrementCounter();\n";
 sb << "\n";
-sb << "    __intrinsic_op T Load(int location);\n";
-sb << "    __intrinsic_op T Load(int location, out uint status);\n";
+sb << "    T Load(int location);\n";
+sb << "    T Load(int location, out uint status);\n";
 sb << "\n";
-sb << "\t__intrinsic_op\n";
 sb << "\t__subscript(uint index) -> T\n";
 sb << "\t{\n";
 sb << "\t\t__intrinsic_op(bufferLoad)\n";
@@ -233,99 +232,96 @@ sb << "\n";
 sb << "// Note(tfoley): Trying to systematically add all the HLSL builtins\n";
 sb << "\n";
 sb << "// Try to terminate the current draw or dispatch call (HLSL SM 4.0)\n";
-sb << "__intrinsic_op void abort();\n";
+sb << "void abort();\n";
 sb << "\n";
 sb << "// Absolute value (HLSL SM 1.0)\n";
-sb << "__generic<T : __BuiltinSignedArithmeticType> __intrinsic_op T abs(T x);\n";
-sb << "__generic<T : __BuiltinSignedArithmeticType, let N : int> __intrinsic_op vector<T,N> abs(vector<T,N> x);\n";
-sb << "__generic<T : __BuiltinSignedArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> abs(matrix<T,N,M> x);\n";
+sb << "__generic<T : __BuiltinSignedArithmeticType> T abs(T x);\n";
+sb << "__generic<T : __BuiltinSignedArithmeticType, let N : int> vector<T,N> abs(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinSignedArithmeticType, let N : int, let M : int> matrix<T,N,M> abs(matrix<T,N,M> x);\n";
 sb << "\n";
 sb << "// Inverse cosine (HLSL SM 1.0)\n";
-sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op T acos(T x);\n";
-sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> acos(vector<T,N> x);\n";
-sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> acos(matrix<T,N,M> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType> T acos(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> vector<T,N> acos(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> matrix<T,N,M> acos(matrix<T,N,M> x);\n";
 sb << "\n";
 sb << "// Test if all components are non-zero (HLSL SM 1.0)\n";
-sb << "__generic<T : __BuiltinType> __intrinsic_op T all(T x);\n";
-sb << "__generic<T : __BuiltinType, let N : int> __intrinsic_op vector<T,N> all(vector<T,N> x);\n";
-sb << "__generic<T : __BuiltinType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> all(matrix<T,N,M> x);\n";
+sb << "__generic<T : __BuiltinType> T all(T x);\n";
+sb << "__generic<T : __BuiltinType, let N : int> vector<T,N> all(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinType, let N : int, let M : int> matrix<T,N,M> all(matrix<T,N,M> x);\n";
 sb << "\n";
 sb << "// Barrier for writes to all memory spaces (HLSL SM 5.0)\n";
-sb << "__intrinsic_op void AllMemoryBarrier();\n";
+sb << "void AllMemoryBarrier();\n";
 sb << "\n";
 sb << "// Thread-group sync and barrier for writes to all memory spaces (HLSL SM 5.0)\n";
-sb << "__intrinsic_op void AllMemoryBarrierWithGroupSync();\n";
+sb << "void AllMemoryBarrierWithGroupSync();\n";
 sb << "\n";
 sb << "// Test if any components is non-zero (HLSL SM 1.0)\n";
-sb << "__generic<T : __BuiltinType> __intrinsic_op T any(T x);\n";
-sb << "__generic<T : __BuiltinType, let N : int> __intrinsic_op vector<T,N> any(vector<T,N> x);\n";
-sb << "__generic<T : __BuiltinType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> any(matrix<T,N,M> x);\n";
+sb << "__generic<T : __BuiltinType> T any(T x);\n";
+sb << "__generic<T : __BuiltinType, let N : int> vector<T,N> any(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinType, let N : int, let M : int> matrix<T,N,M> any(matrix<T,N,M> x);\n";
 sb << "\n";
 sb << "\n";
 sb << "// Reinterpret bits as a double (HLSL SM 5.0)\n";
-sb << "__intrinsic_op double asdouble(uint lowbits, uint highbits);\n";
+sb << "double asdouble(uint lowbits, uint highbits);\n";
 sb << "\n";
 sb << "// Reinterpret bits as a float (HLSL SM 4.0)\n";
-sb << "__intrinsic_op float asfloat( int x);\n";
-sb << "__intrinsic_op float asfloat(uint x);\n";
-sb << "__generic<let N : int> __intrinsic_op vector<float,N> asfloat(vector< int,N> x);\n";
-sb << "__generic<let N : int> __intrinsic_op vector<float,N> asfloat(vector<uint,N> x);\n";
-sb << "__generic<let N : int, let M : int> __intrinsic_op matrix<float,N,M> asfloat(matrix< int,N,M> x);\n";
-sb << "__generic<let N : int, let M : int> __intrinsic_op matrix<float,N,M> asfloat(matrix<uint,N,M> x);\n";
+sb << "float asfloat( int x);\n";
+sb << "float asfloat(uint x);\n";
+sb << "__generic<let N : int> vector<float,N> asfloat(vector< int,N> x);\n";
+sb << "__generic<let N : int> vector<float,N> asfloat(vector<uint,N> x);\n";
+sb << "__generic<let N : int, let M : int> matrix<float,N,M> asfloat(matrix< int,N,M> x);\n";
+sb << "__generic<let N : int, let M : int> matrix<float,N,M> asfloat(matrix<uint,N,M> x);\n";
 sb << "\n";
 sb << "\n";
 sb << "// Inverse sine (HLSL SM 1.0)\n";
-sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op T asin(T x);\n";
-sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> asin(vector<T,N> x);\n";
-sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> asin(matrix<T,N,M> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType> T asin(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> vector<T,N> asin(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> matrix<T,N,M> asin(matrix<T,N,M> x);\n";
 sb << "\n";
 sb << "// Reinterpret bits as an int (HLSL SM 4.0)\n";
-sb << "__intrinsic_op int asint(float x);\n";
-sb << "__intrinsic_op int asint(uint x);\n";
-sb << "__generic<let N : int> __intrinsic_op vector<int,N> asint(vector<float,N> x);\n";
-sb << "__generic<let N : int> __intrinsic_op vector<int,N> asint(vector<uint,N> x);\n";
-sb << "__generic<let N : int, let M : int> __intrinsic_op matrix<int,N,M> asint(matrix<float,N,M> x);\n";
-sb << "__generic<let N : int, let M : int> __intrinsic_op matrix<int,N,M> asint(matrix<uint,N,M> x);\n";
+sb << "int asint(float x);\n";
+sb << "int asint(uint x);\n";
+sb << "__generic<let N : int> vector<int,N> asint(vector<float,N> x);\n";
+sb << "__generic<let N : int> vector<int,N> asint(vector<uint,N> x);\n";
+sb << "__generic<let N : int, let M : int> matrix<int,N,M> asint(matrix<float,N,M> x);\n";
+sb << "__generic<let N : int, let M : int> matrix<int,N,M> asint(matrix<uint,N,M> x);\n";
 sb << "\n";
 sb << "// Reinterpret bits of double as a uint (HLSL SM 5.0)\n";
-sb << "__intrinsic_op void asuint(double value, out uint lowbits, out uint highbits);\n";
+sb << "void asuint(double value, out uint lowbits, out uint highbits);\n";
 sb << "\n";
 sb << "// Reinterpret bits as a uint (HLSL SM 4.0)\n";
-sb << "__intrinsic_op uint asuint(float x);\n";
-sb << "__intrinsic_op uint asuint(int x);\n";
-sb << "__generic<let N : int> __intrinsic_op vector<uint,N> asuint(vector<float,N> x);\n";
-sb << "__generic<let N : int> __intrinsic_op vector<uint,N> asuint(vector<int,N> x);\n";
-sb << "__generic<let N : int, let M : int> __intrinsic_op matrix<uint,N,M> asuint(matrix<float,N,M> x);\n";
-sb << "__generic<let N : int, let M : int> __intrinsic_op matrix<uint,N,M> asuint(matrix<int,N,M> x);\n";
+sb << "uint asuint(float x);\n";
+sb << "uint asuint(int x);\n";
+sb << "__generic<let N : int> vector<uint,N> asuint(vector<float,N> x);\n";
+sb << "__generic<let N : int> vector<uint,N> asuint(vector<int,N> x);\n";
+sb << "__generic<let N : int, let M : int> matrix<uint,N,M> asuint(matrix<float,N,M> x);\n";
+sb << "__generic<let N : int, let M : int> matrix<uint,N,M> asuint(matrix<int,N,M> x);\n";
 sb << "\n";
 sb << "// Inverse tangent (HLSL SM 1.0)\n";
-sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op T atan(T x);\n";
-sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> atan(vector<T,N> x);\n";
-sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> atan(matrix<T,N,M> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType> T atan(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> vector<T,N> atan(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> matrix<T,N,M> atan(matrix<T,N,M> x);\n";
 sb << "\n";
 sb << "__generic<T : __BuiltinFloatingPointType>\n";
 sb << "__target_intrinsic(glsl,\"atan($0,$1)\")\n";
-sb << "__intrinsic_op\n";
 sb << "T atan2(T y, T x);\n";
 sb << "\n";
 sb << "__generic<T : __BuiltinFloatingPointType, let N : int>\n";
 sb << "__target_intrinsic(glsl,\"atan($0,$1)\")\n";
-sb << "__intrinsic_op\n";
 sb << "vector<T,N> atan2(vector<T,N> y, vector<T,N> x);\n";
 sb << "\n";
 sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>\n";
 sb << "__target_intrinsic(glsl,\"atan($0,$1)\")\n";
-sb << "__intrinsic_op\n";
 sb << "matrix<T,N,M> atan2(matrix<T,N,M> y, matrix<T,N,M> x);\n";
 sb << "\n";
 sb << "// Ceiling (HLSL SM 1.0)\n";
-sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op T ceil(T x);\n";
-sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> ceil(vector<T,N> x);\n";
-sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> ceil(matrix<T,N,M> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType> T ceil(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> vector<T,N> ceil(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> matrix<T,N,M> ceil(matrix<T,N,M> x);\n";
 sb << "\n";
 sb << "\n";
 sb << "// Check access status to tiled resource\n";
-sb << "__intrinsic_op bool CheckAccessFullyMapped(uint status);\n";
+sb << "bool CheckAccessFullyMapped(uint status);\n";
 sb << "\n";
 sb << "// Clamp (HLSL SM 1.0)\n";
 sb << "__generic<T : __BuiltinArithmeticType> T clamp(T x, T min, T max);\n";
@@ -333,9 +329,9 @@ sb << "__generic<T : __BuiltinArithmeticType, let N : int> vector<T,N> clamp(vec
 sb << "__generic<T : __BuiltinArithmeticType, let N : int, let M : int> matrix<T,N,M> clamp(matrix<T,N,M> x, matrix<T,N,M> min, matrix<T,N,M> max);\n";
 sb << "\n";
 sb << "// Clip (discard) fragment conditionally\n";
-sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op void clip(T x);\n";
-sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op void clip(vector<T,N> x);\n";
-sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op void clip(matrix<T,N,M> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType> void clip(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> void clip(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> void clip(matrix<T,N,M> x);\n";
 sb << "\n";
 sb << "// Cosine\n";
 sb << "__generic<T : __BuiltinFloatingPointType> T cos(T x);\n";
@@ -343,360 +339,348 @@ sb << "__generic<T : __BuiltinFloatingPointType, let N : int> vector<T,N> cos(ve
 sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> matrix<T,N,M> cos(matrix<T,N,M> x);\n";
 sb << "\n";
 sb << "// Hyperbolic cosine\n";
-sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op T cosh(T x);\n";
-sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> cosh(vector<T,N> x);\n";
-sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> cosh(matrix<T,N,M> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType> T cosh(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> vector<T,N> cosh(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> matrix<T,N,M> cosh(matrix<T,N,M> x);\n";
 sb << "\n";
 sb << "// Population count\n";
-sb << "__intrinsic_op uint countbits(uint value);\n";
+sb << "uint countbits(uint value);\n";
 sb << "\n";
 sb << "// Cross product\n";
-sb << "__generic<T : __BuiltinArithmeticType> __intrinsic_op vector<T,3> cross(vector<T,3> x, vector<T,3> y);\n";
+sb << "__generic<T : __BuiltinArithmeticType> vector<T,3> cross(vector<T,3> x, vector<T,3> y);\n";
 sb << "\n";
 sb << "// Convert encoded color\n";
-sb << "__intrinsic_op int4 D3DCOLORtoUBYTE4(float4 x);\n";
+sb << "int4 D3DCOLORtoUBYTE4(float4 x);\n";
 sb << "\n";
 sb << "// Partial-difference derivatives\n";
 sb << "__generic<T : __BuiltinFloatingPointType>\n";
 sb << "__target_intrinsic(glsl, dFdx)\n";
-sb << "__intrinsic_op\n";
 sb << "T ddx(T x);\n";
+sb << "\n";
 sb << "__generic<T : __BuiltinFloatingPointType, let N : int>\n";
 sb << "__target_intrinsic(glsl, dFdx)\n";
-sb << "__intrinsic_op\n";
 sb << "vector<T,N> ddx(vector<T,N> x);\n";
+sb << "\n";
 sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>\n";
 sb << "__target_intrinsic(glsl, dFdx)\n";
-sb << "__intrinsic_op\n";
 sb << "matrix<T,N,M> ddx(matrix<T,N,M> x);\n";
 sb << "\n";
 sb << "__generic<T : __BuiltinFloatingPointType>\n";
 sb << "__glsl_extension(GL_ARB_derivative_control)\n";
 sb << "__target_intrinsic(glsl, dFdxCoarse)\n";
-sb << "__intrinsic_op\n";
 sb << "T ddx_coarse(T x);\n";
+sb << "\n";
 sb << "__generic<T : __BuiltinFloatingPointType, let N : int>\n";
 sb << "__glsl_extension(GL_ARB_derivative_control)\n";
 sb << "__target_intrinsic(glsl, dFdxCoarse)\n";
-sb << "__intrinsic_op\n";
 sb << "vector<T,N> ddx_coarse(vector<T,N> x);\n";
+sb << "\n";
 sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>\n";
 sb << "__glsl_extension(GL_ARB_derivative_control)\n";
 sb << "__target_intrinsic(glsl, dFdxCoarse)\n";
-sb << "__intrinsic_op\n";
 sb << "matrix<T,N,M> ddx_coarse(matrix<T,N,M> x);\n";
 sb << "\n";
 sb << "__generic<T : __BuiltinFloatingPointType>\n";
 sb << "__glsl_extension(GL_ARB_derivative_control)\n";
 sb << "__target_intrinsic(glsl, dFdxFine)\n";
-sb << "__intrinsic_op\n";
 sb << "T ddx_fine(T x);\n";
+sb << "\n";
 sb << "__generic<T : __BuiltinFloatingPointType, let N : int>\n";
 sb << "__glsl_extension(GL_ARB_derivative_control)\n";
 sb << "__target_intrinsic(glsl, dFdxFine)\n";
-sb << "__intrinsic_op\n";
 sb << "vector<T,N> ddx_fine(vector<T,N> x);\n";
+sb << "\n";
 sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>\n";
 sb << "__glsl_extension(GL_ARB_derivative_control)\n";
 sb << "__target_intrinsic(glsl, dFdxFine)\n";
-sb << "__intrinsic_op\n";
 sb << "matrix<T,N,M> ddx_fine(matrix<T,N,M> x);\n";
 sb << "\n";
 sb << "__generic<T : __BuiltinFloatingPointType>\n";
 sb << "__target_intrinsic(glsl, dFdy)\n";
-sb << "__intrinsic_op\n";
 sb << "T ddy(T x);\n";
+sb << "\n";
 sb << "__generic<T : __BuiltinFloatingPointType, let N : int>\n";
 sb << "__target_intrinsic(glsl, dFdy)\n";
-sb << "__intrinsic_op\n";
 sb << "vector<T,N> ddy(vector<T,N> x);\n";
+sb << "\n";
 sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>\n";
 sb << "__target_intrinsic(glsl, dFdy)\n";
-sb << "__intrinsic_op\n";
 sb << " matrix<T,N,M> ddy(matrix<T,N,M> x);\n";
 sb << "\n";
 sb << "__generic<T : __BuiltinFloatingPointType>\n";
 sb << "__glsl_extension(GL_ARB_derivative_control)\n";
 sb << "__target_intrinsic(glsl, dFdyCoarse)\n";
-sb << "__intrinsic_op\n";
 sb << "T ddy_coarse(T x);\n";
+sb << "\n";
 sb << "__generic<T : __BuiltinFloatingPointType, let N : int>\n";
 sb << "__glsl_extension(GL_ARB_derivative_control)\n";
 sb << "__target_intrinsic(glsl, dFdyCoarse)\n";
-sb << "__intrinsic_op\n";
 sb << "vector<T,N> ddy_coarse(vector<T,N> x);\n";
+sb << "\n";
 sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>\n";
 sb << "__glsl_extension(GL_ARB_derivative_control)\n";
 sb << "__target_intrinsic(glsl, dFdyCoarse)\n";
-sb << "__intrinsic_op\n";
 sb << "matrix<T,N,M> ddy_coarse(matrix<T,N,M> x);\n";
 sb << "\n";
 sb << "__generic<T : __BuiltinFloatingPointType>\n";
 sb << "__glsl_extension(GL_ARB_derivative_control)\n";
 sb << "__target_intrinsic(glsl, dFdyFine)\n";
-sb << "__intrinsic_op\n";
 sb << "T ddy_fine(T x);\n";
+sb << "\n";
 sb << "__generic<T : __BuiltinFloatingPointType, let N : int>\n";
 sb << "__glsl_extension(GL_ARB_derivative_control)\n";
 sb << "__target_intrinsic(glsl, dFdyFine)\n";
-sb << "__intrinsic_op\n";
 sb << "vector<T,N> ddy_fine(vector<T,N> x);\n";
+sb << "\n";
 sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>\n";
 sb << "__glsl_extension(GL_ARB_derivative_control)\n";
 sb << "__target_intrinsic(glsl, dFdyFine)\n";
-sb << "__intrinsic_op\n";
 sb << "matrix<T,N,M> ddy_fine(matrix<T,N,M> x);\n";
 sb << "\n";
 sb << "\n";
 sb << "// Radians to degrees\n";
-sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op T degrees(T x);\n";
-sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> degrees(vector<T,N> x);\n";
-sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> degrees(matrix<T,N,M> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType> T degrees(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> vector<T,N> degrees(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> matrix<T,N,M> degrees(matrix<T,N,M> x);\n";
 sb << "\n";
 sb << "// Matrix determinant\n";
 sb << "\n";
-sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op T determinant(matrix<T,N,N> m);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> T determinant(matrix<T,N,N> m);\n";
 sb << "\n";
 sb << "// Barrier for device memory\n";
-sb << "__intrinsic_op void DeviceMemoryBarrier();\n";
-sb << "__intrinsic_op void DeviceMemoryBarrierWithGroupSync();\n";
+sb << "void DeviceMemoryBarrier();\n";
+sb << "void DeviceMemoryBarrierWithGroupSync();\n";
 sb << "\n";
 sb << "// Vector distance\n";
 sb << "\n";
-sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op T distance(vector<T,N> x, vector<T,N> y);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> T distance(vector<T,N> x, vector<T,N> y);\n";
 sb << "\n";
 sb << "// Vector dot product\n";
 sb << "\n";
-sb << "__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op T dot(vector<T,N> x, vector<T,N> y);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int> T dot(vector<T,N> x, vector<T,N> y);\n";
 sb << "\n";
 sb << "// Helper for computing distance terms for lighting (obsolete)\n";
 sb << "\n";
-sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op vector<T,4> dst(vector<T,4> x, vector<T,4> y);\n";
+sb << "__generic<T : __BuiltinFloatingPointType> vector<T,4> dst(vector<T,4> x, vector<T,4> y);\n";
 sb << "\n";
 sb << "// Error message\n";
 sb << "\n";
-sb << "// __intrinsic_op void errorf( string format, ... );\n";
+sb << "// void errorf( string format, ... );\n";
 sb << "\n";
 sb << "// Attribute evaluation\n";
 sb << "\n";
-sb << "__generic<T : __BuiltinArithmeticType> __intrinsic_op T EvaluateAttributeAtCentroid(T x);\n";
-sb << "__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> EvaluateAttributeAtCentroid(vector<T,N> x);\n";
-sb << "__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> EvaluateAttributeAtCentroid(matrix<T,N,M> x);\n";
+sb << "__generic<T : __BuiltinArithmeticType> T EvaluateAttributeAtCentroid(T x);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int> vector<T,N> EvaluateAttributeAtCentroid(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int, let M : int> matrix<T,N,M> EvaluateAttributeAtCentroid(matrix<T,N,M> x);\n";
 sb << "\n";
-sb << "__generic<T : __BuiltinArithmeticType> __intrinsic_op T EvaluateAttributeAtSample(T x, uint sampleindex);\n";
-sb << "__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> EvaluateAttributeAtSample(vector<T,N> x, uint sampleindex);\n";
-sb << "__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> EvaluateAttributeAtSample(matrix<T,N,M> x, uint sampleindex);\n";
+sb << "__generic<T : __BuiltinArithmeticType> T EvaluateAttributeAtSample(T x, uint sampleindex);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int> vector<T,N> EvaluateAttributeAtSample(vector<T,N> x, uint sampleindex);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int, let M : int> matrix<T,N,M> EvaluateAttributeAtSample(matrix<T,N,M> x, uint sampleindex);\n";
 sb << "\n";
-sb << "__generic<T : __BuiltinArithmeticType> __intrinsic_op T EvaluateAttributeSnapped(T x, int2 offset);\n";
-sb << "__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> EvaluateAttributeSnapped(vector<T,N> x, int2 offset);\n";
-sb << "__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> EvaluateAttributeSnapped(matrix<T,N,M> x, int2 offset);\n";
+sb << "__generic<T : __BuiltinArithmeticType> T EvaluateAttributeSnapped(T x, int2 offset);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int> vector<T,N> EvaluateAttributeSnapped(vector<T,N> x, int2 offset);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int, let M : int> matrix<T,N,M> EvaluateAttributeSnapped(matrix<T,N,M> x, int2 offset);\n";
 sb << "\n";
 sb << "// Base-e exponent\n";
-sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op T exp(T x);\n";
-sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> exp(vector<T,N> x);\n";
-sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> exp(matrix<T,N,M> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType> T exp(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> vector<T,N> exp(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> matrix<T,N,M> exp(matrix<T,N,M> x);\n";
 sb << "\n";
 sb << "// Base-2 exponent\n";
-sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op T exp2(T x);\n";
-sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> exp2(vector<T,N> x);\n";
-sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> exp2(matrix<T,N,M> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType> T exp2(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> vector<T,N> exp2(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> matrix<T,N,M> exp2(matrix<T,N,M> x);\n";
 sb << "\n";
 sb << "// Convert 16-bit float stored in low bits of integer\n";
-sb << "__intrinsic_op float f16tof32(uint value);\n";
-sb << "__generic<let N : int> __intrinsic_op vector<float,N> f16tof32(vector<uint,N> value);\n";
+sb << "float f16tof32(uint value);\n";
+sb << "__generic<let N : int> vector<float,N> f16tof32(vector<uint,N> value);\n";
 sb << "\n";
 sb << "// Convert to 16-bit float stored in low bits of integer\n";
-sb << "__intrinsic_op uint f32tof16(float value);\n";
-sb << "__generic<let N : int> __intrinsic_op vector<uint,N> f32tof16(vector<float,N> value);\n";
+sb << "uint f32tof16(float value);\n";
+sb << "__generic<let N : int> vector<uint,N> f32tof16(vector<float,N> value);\n";
 sb << "\n";
 sb << "// Flip surface normal to face forward, if needed\n";
-sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> faceforward(vector<T,N> n, vector<T,N> i, vector<T,N> ng);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> vector<T,N> faceforward(vector<T,N> n, vector<T,N> i, vector<T,N> ng);\n";
 sb << "\n";
 sb << "// Find first set bit starting at high bit and working down\n";
-sb << "__intrinsic_op int firstbithigh(int value);\n";
-sb << "__generic<let N : int> __intrinsic_op vector<int,N> firstbithigh(vector<int,N> value);\n";
+sb << "int firstbithigh(int value);\n";
+sb << "__generic<let N : int> vector<int,N> firstbithigh(vector<int,N> value);\n";
 sb << "\n";
-sb << "__intrinsic_op uint firstbithigh(uint value);\n";
-sb << "__generic<let N : int> __intrinsic_op vector<uint,N> firstbithigh(vector<uint,N> value);\n";
+sb << "uint firstbithigh(uint value);\n";
+sb << "__generic<let N : int> vector<uint,N> firstbithigh(vector<uint,N> value);\n";
 sb << "\n";
 sb << "// Find first set bit starting at low bit and working up\n";
-sb << "__intrinsic_op int firstbitlow(int value);\n";
-sb << "__generic<let N : int> __intrinsic_op vector<int,N> firstbitlow(vector<int,N> value);\n";
+sb << "int firstbitlow(int value);\n";
+sb << "__generic<let N : int> vector<int,N> firstbitlow(vector<int,N> value);\n";
 sb << "\n";
-sb << "__intrinsic_op uint firstbitlow(uint value);\n";
-sb << "__generic<let N : int> __intrinsic_op vector<uint,N> firstbitlow(vector<uint,N> value);\n";
+sb << "uint firstbitlow(uint value);\n";
+sb << "__generic<let N : int> vector<uint,N> firstbitlow(vector<uint,N> value);\n";
 sb << "\n";
 sb << "// Floor (HLSL SM 1.0)\n";
-sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op T floor(T x);\n";
-sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> floor(vector<T,N> x);\n";
-sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> floor(matrix<T,N,M> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType> T floor(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> vector<T,N> floor(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> matrix<T,N,M> floor(matrix<T,N,M> x);\n";
 sb << "\n";
 sb << "// Fused multiply-add for doubles\n";
-sb << "__intrinsic_op double fma(double a, double b, double c);\n";
-sb << "__generic<let N : int> __intrinsic_op vector<double, N> fma(vector<double, N> a, vector<double, N> b, vector<double, N> c);\n";
-sb << "__generic<let N : int, let M : int> __intrinsic_op matrix<double,N,M> fma(matrix<double,N,M> a, matrix<double,N,M> b, matrix<double,N,M> c);\n";
+sb << "double fma(double a, double b, double c);\n";
+sb << "__generic<let N : int> vector<double, N> fma(vector<double, N> a, vector<double, N> b, vector<double, N> c);\n";
+sb << "__generic<let N : int, let M : int> matrix<double,N,M> fma(matrix<double,N,M> a, matrix<double,N,M> b, matrix<double,N,M> c);\n";
 sb << "\n";
 sb << "// Floating point remainder of x/y\n";
-sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op T fmod(T x, T y);\n";
-sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> fmod(vector<T,N> x, vector<T,N> y);\n";
-sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> fmod(matrix<T,N,M> x, matrix<T,N,M> y);\n";
+sb << "__generic<T : __BuiltinFloatingPointType> T fmod(T x, T y);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> vector<T,N> fmod(vector<T,N> x, vector<T,N> y);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> matrix<T,N,M> fmod(matrix<T,N,M> x, matrix<T,N,M> y);\n";
 sb << "\n";
 sb << "// Fractional part\n";
 sb << "__generic<T : __BuiltinFloatingPointType>\n";
 sb << "__target_intrinsic(glsl, fract)\n";
-sb << "__intrinsic_op\n";
 sb << "T frac(T x);\n";
 sb << "\n";
 sb << "__generic<T : __BuiltinFloatingPointType, let N : int>\n";
 sb << "__target_intrinsic(glsl, fract)\n";
-sb << "__intrinsic_op\n";
 sb << "vector<T,N> frac(vector<T,N> x);\n";
 sb << "\n";
 sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>\n";
 sb << "__target_intrinsic(glsl, fract)\n";
-sb << "__intrinsic_op\n";
 sb << "matrix<T,N,M> frac(matrix<T,N,M> x);\n";
 sb << "\n";
 sb << "// Split float into mantissa and exponent\n";
-sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op T frexp(T x, out T exp);\n";
-sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> frexp(vector<T,N> x, out vector<T,N> exp);\n";
-sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> frexp(matrix<T,N,M> x, out matrix<T,N,M> exp);\n";
+sb << "__generic<T : __BuiltinFloatingPointType> T frexp(T x, out T exp);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> vector<T,N> frexp(vector<T,N> x, out vector<T,N> exp);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> matrix<T,N,M> frexp(matrix<T,N,M> x, out matrix<T,N,M> exp);\n";
 sb << "\n";
 sb << "// Texture filter width\n";
-sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op T fwidth(T x);\n";
-sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> fwidth(vector<T,N> x);\n";
-sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> fwidth(matrix<T,N,M> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType> T fwidth(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> vector<T,N> fwidth(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> matrix<T,N,M> fwidth(matrix<T,N,M> x);\n";
 sb << "\n";
 sb << "// Get number of samples in render target\n";
-sb << "__intrinsic_op uint GetRenderTargetSampleCount();\n";
+sb << "uint GetRenderTargetSampleCount();\n";
 sb << "\n";
 sb << "// Get position of given sample\n";
-sb << "__intrinsic_op float2 GetRenderTargetSamplePosition(int Index);\n";
+sb << "float2 GetRenderTargetSamplePosition(int Index);\n";
 sb << "\n";
 sb << "// Group memory barrier\n";
-sb << "__intrinsic_op void GroupMemoryBarrier();\n";
-sb << "__intrinsic_op void GroupMemoryBarrierWithGroupSync();\n";
+sb << "void GroupMemoryBarrier();\n";
+sb << "void GroupMemoryBarrierWithGroupSync();\n";
 sb << "\n";
 sb << "// Atomics\n";
-sb << "__intrinsic_op void InterlockedAdd(in out  int dest,  int value, out  int original_value);\n";
-sb << "__intrinsic_op void InterlockedAdd(in out uint dest, uint value, out uint original_value);\n";
+sb << "void InterlockedAdd(in out  int dest,  int value, out  int original_value);\n";
+sb << "void InterlockedAdd(in out uint dest, uint value, out uint original_value);\n";
 sb << "\n";
-sb << "__intrinsic_op void InterlockedAnd(in out  int dest,  int value, out  int original_value);\n";
-sb << "__intrinsic_op void InterlockedAnd(in out uint dest, uint value, out uint original_value);\n";
+sb << "void InterlockedAnd(in out  int dest,  int value, out  int original_value);\n";
+sb << "void InterlockedAnd(in out uint dest, uint value, out uint original_value);\n";
 sb << "\n";
-sb << "__intrinsic_op void InterlockedCompareExchange(in out  int dest,  int compare_value,  int value, out  int original_value);\n";
-sb << "__intrinsic_op void InterlockedCompareExchange(in out uint dest, uint compare_value, uint value, out uint original_value);\n";
+sb << "void InterlockedCompareExchange(in out  int dest,  int compare_value,  int value, out  int original_value);\n";
+sb << "void InterlockedCompareExchange(in out uint dest, uint compare_value, uint value, out uint original_value);\n";
 sb << "\n";
-sb << "__intrinsic_op void InterlockedCompareStore(in out  int dest,  int compare_value,  int value);\n";
-sb << "__intrinsic_op void InterlockedCompareStore(in out uint dest, uint compare_value, uint value);\n";
+sb << "void InterlockedCompareStore(in out  int dest,  int compare_value,  int value);\n";
+sb << "void InterlockedCompareStore(in out uint dest, uint compare_value, uint value);\n";
 sb << "\n";
-sb << "__intrinsic_op void InterlockedExchange(in out  int dest,  int value, out  int original_value);\n";
-sb << "__intrinsic_op void InterlockedExchange(in out uint dest, uint value, out uint original_value);\n";
+sb << "void InterlockedExchange(in out  int dest,  int value, out  int original_value);\n";
+sb << "void InterlockedExchange(in out uint dest, uint value, out uint original_value);\n";
 sb << "\n";
-sb << "__intrinsic_op void InterlockedMax(in out  int dest,  int value, out  int original_value);\n";
-sb << "__intrinsic_op void InterlockedMax(in out uint dest, uint value, out uint original_value);\n";
+sb << "void InterlockedMax(in out  int dest,  int value, out  int original_value);\n";
+sb << "void InterlockedMax(in out uint dest, uint value, out uint original_value);\n";
 sb << "\n";
-sb << "__intrinsic_op void InterlockedMin(in out  int dest,  int value, out  int original_value);\n";
-sb << "__intrinsic_op void InterlockedMin(in out uint dest, uint value, out uint original_value);\n";
+sb << "void InterlockedMin(in out  int dest,  int value, out  int original_value);\n";
+sb << "void InterlockedMin(in out uint dest, uint value, out uint original_value);\n";
 sb << "\n";
-sb << "__intrinsic_op void InterlockedOr(in out  int dest,  int value, out  int original_value);\n";
-sb << "__intrinsic_op void InterlockedOr(in out uint dest, uint value, out uint original_value);\n";
+sb << "void InterlockedOr(in out  int dest,  int value, out  int original_value);\n";
+sb << "void InterlockedOr(in out uint dest, uint value, out uint original_value);\n";
 sb << "\n";
-sb << "__intrinsic_op void InterlockedXor(in out  int dest,  int value, out  int original_value);\n";
-sb << "__intrinsic_op void InterlockedXor(in out uint dest, uint value, out uint original_value);\n";
+sb << "void InterlockedXor(in out  int dest,  int value, out  int original_value);\n";
+sb << "void InterlockedXor(in out uint dest, uint value, out uint original_value);\n";
 sb << "\n";
 sb << "// Is floating-point value finite?\n";
-sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op bool isfinite(T x);\n";
-sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<bool,N> isfinite(vector<T,N> x);\n";
-sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<bool,N,M> isfinite(matrix<T,N,M> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType> bool isfinite(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> vector<bool,N> isfinite(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> matrix<bool,N,M> isfinite(matrix<T,N,M> x);\n";
 sb << "\n";
 sb << "// Is floating-point value infinite?\n";
-sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op bool isinf(T x);\n";
-sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<bool,N> isinf(vector<T,N> x);\n";
-sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<bool,N,M> isinf(matrix<T,N,M> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType> bool isinf(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> vector<bool,N> isinf(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> matrix<bool,N,M> isinf(matrix<T,N,M> x);\n";
 sb << "\n";
 sb << "// Is floating-point value not-a-number?\n";
-sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op bool isnan(T x);\n";
-sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<bool,N> isnan(vector<T,N> x);\n";
-sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<bool,N,M> isnan(matrix<T,N,M> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType> bool isnan(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> vector<bool,N> isnan(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> matrix<bool,N,M> isnan(matrix<T,N,M> x);\n";
 sb << "\n";
 sb << "// Construct float from mantissa and exponent\n";
-sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op T ldexp(T x, T exp);\n";
-sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> ldexp(vector<T,N> x, vector<T,N> exp);\n";
-sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> ldexp(matrix<T,N,M> x, matrix<T,N,M> exp);\n";
+sb << "__generic<T : __BuiltinFloatingPointType> T ldexp(T x, T exp);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> vector<T,N> ldexp(vector<T,N> x, vector<T,N> exp);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> matrix<T,N,M> ldexp(matrix<T,N,M> x, matrix<T,N,M> exp);\n";
 sb << "\n";
 sb << "// Vector length\n";
-sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op T length(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> T length(vector<T,N> x);\n";
 sb << "\n";
 sb << "// Linear interpolation\n";
 sb << "__generic<T : __BuiltinFloatingPointType>\n";
 sb << "__target_intrinsic(glsl, mix)\n";
-sb << "__intrinsic_op\n";
 sb << "T lerp(T x, T y, T s);\n";
 sb << "\n";
 sb << "__generic<T : __BuiltinFloatingPointType, let N : int>\n";
 sb << "__target_intrinsic(glsl, mix)\n";
-sb << "__intrinsic_op\n";
 sb << "vector<T,N> lerp(vector<T,N> x, vector<T,N> y, vector<T,N> s);\n";
 sb << "\n";
 sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>\n";
 sb << "__target_intrinsic(glsl, mix)\n";
-sb << "__intrinsic_op\n";
 sb << "matrix<T,N,M> lerp(matrix<T,N,M> x, matrix<T,N,M> y, matrix<T,N,M> s);\n";
 sb << "\n";
 sb << "// Legacy lighting function (obsolete)\n";
-sb << "__intrinsic_op float4 lit(float n_dot_l, float n_dot_h, float m);\n";
+sb << "float4 lit(float n_dot_l, float n_dot_h, float m);\n";
 sb << "\n";
 sb << "// Base-e logarithm\n";
-sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op T log(T x);\n";
-sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> log(vector<T,N> x);\n";
-sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> log(matrix<T,N,M> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType> T log(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> vector<T,N> log(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> matrix<T,N,M> log(matrix<T,N,M> x);\n";
 sb << "\n";
 sb << "// Base-10 logarithm\n";
-sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op T log10(T x);\n";
-sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> log10(vector<T,N> x);\n";
-sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> log10(matrix<T,N,M> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType> T log10(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> vector<T,N> log10(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> matrix<T,N,M> log10(matrix<T,N,M> x);\n";
 sb << "\n";
 sb << "// Base-2 logarithm\n";
-sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op T log2(T x);\n";
-sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> log2(vector<T,N> x);\n";
-sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> log2(matrix<T,N,M> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType> T log2(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> vector<T,N> log2(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> matrix<T,N,M> log2(matrix<T,N,M> x);\n";
 sb << "\n";
 sb << "// multiply-add\n";
-sb << "__generic<T : __BuiltinArithmeticType> __intrinsic_op T mad(T mvalue, T avalue, T bvalue);\n";
-sb << "__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> mad(vector<T,N> mvalue, vector<T,N> avalue, vector<T,N> bvalue);\n";
-sb << "__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> mad(matrix<T,N,M> mvalue, matrix<T,N,M> avalue, matrix<T,N,M> bvalue);\n";
+sb << "__generic<T : __BuiltinArithmeticType> T mad(T mvalue, T avalue, T bvalue);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int> vector<T,N> mad(vector<T,N> mvalue, vector<T,N> avalue, vector<T,N> bvalue);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int, let M : int> matrix<T,N,M> mad(matrix<T,N,M> mvalue, matrix<T,N,M> avalue, matrix<T,N,M> bvalue);\n";
 sb << "\n";
 sb << "// maximum\n";
-sb << "__generic<T : __BuiltinArithmeticType> __intrinsic_op T max(T x, T y);\n";
-sb << "__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> max(vector<T,N> x, vector<T,N> y);\n";
-sb << "__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> max(matrix<T,N,M> x, matrix<T,N,M> y);\n";
+sb << "__generic<T : __BuiltinArithmeticType> T max(T x, T y);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int> vector<T,N> max(vector<T,N> x, vector<T,N> y);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int, let M : int> matrix<T,N,M> max(matrix<T,N,M> x, matrix<T,N,M> y);\n";
 sb << "\n";
 sb << "// minimum\n";
-sb << "__generic<T : __BuiltinArithmeticType> __intrinsic_op T min(T x, T y);\n";
-sb << "__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> min(vector<T,N> x, vector<T,N> y);\n";
-sb << "__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> min(matrix<T,N,M> x, matrix<T,N,M> y);\n";
+sb << "__generic<T : __BuiltinArithmeticType> T min(T x, T y);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int> vector<T,N> min(vector<T,N> x, vector<T,N> y);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int, let M : int> matrix<T,N,M> min(matrix<T,N,M> x, matrix<T,N,M> y);\n";
 sb << "\n";
 sb << "// split into integer and fractional parts (both with same sign)\n";
-sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op T modf(T x, out T ip);\n";
-sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> modf(vector<T,N> x, out vector<T,N> ip);\n";
-sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> modf(matrix<T,N,M> x, out matrix<T,N,M> ip);\n";
+sb << "__generic<T : __BuiltinFloatingPointType> T modf(T x, out T ip);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> vector<T,N> modf(vector<T,N> x, out vector<T,N> ip);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> matrix<T,N,M> modf(matrix<T,N,M> x, out matrix<T,N,M> ip);\n";
 sb << "\n";
 sb << "// msad4 (whatever that is)\n";
-sb << "__intrinsic_op uint4 msad4(uint reference, uint2 source, uint4 accum);\n";
+sb << "uint4 msad4(uint reference, uint2 source, uint4 accum);\n";
 sb << "\n";
 sb << "// General inner products\n";
 sb << "\n";
 sb << "// scalar-scalar\n";
-sb << "__generic<T : __BuiltinArithmeticType> __intrinsic_op T mul(T x, T y);\n";
+sb << "__generic<T : __BuiltinArithmeticType> T mul(T x, T y);\n";
 sb << "\n";
 sb << "// scalar-vector and vector-scalar\n";
-sb << "__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> mul(vector<T,N> x, T y);\n";
-sb << "__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> mul(T x, vector<T,N> y);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int> vector<T,N> mul(vector<T,N> x, T y);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int> vector<T,N> mul(T x, vector<T,N> y);\n";
 sb << "\n";
 sb << "// scalar-matrix and matrix-scalar\n";
-sb << "__generic<T : __BuiltinArithmeticType, let N : int, let M :int> __intrinsic_op matrix<T,N,M> mul(matrix<T,N,M> x, T y);\n";
-sb << "__generic<T : __BuiltinArithmeticType, let N : int, let M :int> __intrinsic_op matrix<T,N,M> mul(T x, matrix<T,N,M> y);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int, let M :int> matrix<T,N,M> mul(matrix<T,N,M> x, T y);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int, let M :int> matrix<T,N,M> mul(T x, matrix<T,N,M> y);\n";
 sb << "\n";
 sb << "// vector-vector (dot product)\n";
 sb << "__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op(dot) T mul(vector<T,N> x, vector<T,N> y);\n";
@@ -711,86 +695,86 @@ sb << "// matrix-matrix\n";
 sb << "__generic<T : __BuiltinArithmeticType, let R : int, let N : int, let C : int> __intrinsic_op(mulMatrixMatrix) matrix<T,R,C> mul(matrix<T,R,N> x, matrix<T,N,C> y);\n";
 sb << "\n";
 sb << "// noise (deprecated)\n";
-sb << "__intrinsic_op float noise(float x);\n";
-sb << "__generic<let N : int> __intrinsic_op float noise(vector<float, N> x);\n";
+sb << "float noise(float x);\n";
+sb << "__generic<let N : int> float noise(vector<float, N> x);\n";
 sb << "\n";
 sb << "// Normalize a vector\n";
-sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> normalize(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> vector<T,N> normalize(vector<T,N> x);\n";
 sb << "\n";
 sb << "// Raise to a power\n";
-sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op T pow(T x, T y);\n";
-sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> pow(vector<T,N> x, vector<T,N> y);\n";
-sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> pow(matrix<T,N,M> x, matrix<T,N,M> y);\n";
+sb << "__generic<T : __BuiltinFloatingPointType> T pow(T x, T y);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> vector<T,N> pow(vector<T,N> x, vector<T,N> y);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> matrix<T,N,M> pow(matrix<T,N,M> x, matrix<T,N,M> y);\n";
 sb << "\n";
 sb << "// Output message\n";
 sb << "\n";
-sb << "// __intrinsic_op void printf( string format, ... );\n";
+sb << "// void printf( string format, ... );\n";
 sb << "\n";
 sb << "// Tessellation factor fixup routines\n";
 sb << "\n";
-sb << "__intrinsic_op void Process2DQuadTessFactorsAvg(\n";
+sb << "void Process2DQuadTessFactorsAvg(\n";
 sb << "    in  float4 RawEdgeFactors,\n";
 sb << "    in  float2 InsideScale,\n";
 sb << "    out float4 RoundedEdgeTessFactors,\n";
 sb << "    out float2 RoundedInsideTessFactors,\n";
 sb << "    out float2 UnroundedInsideTessFactors);\n";
 sb << "\n";
-sb << "__intrinsic_op void Process2DQuadTessFactorsMax(\n";
+sb << "void Process2DQuadTessFactorsMax(\n";
 sb << "    in  float4 RawEdgeFactors,\n";
 sb << "    in  float2 InsideScale,\n";
 sb << "    out float4 RoundedEdgeTessFactors,\n";
 sb << "    out float2 RoundedInsideTessFactors,\n";
 sb << "    out float2 UnroundedInsideTessFactors);\n";
 sb << "\n";
-sb << "__intrinsic_op void Process2DQuadTessFactorsMin(\n";
+sb << "void Process2DQuadTessFactorsMin(\n";
 sb << "    in  float4 RawEdgeFactors,\n";
 sb << "    in  float2 InsideScale,\n";
 sb << "    out float4 RoundedEdgeTessFactors,\n";
 sb << "    out float2 RoundedInsideTessFactors,\n";
 sb << "    out float2 UnroundedInsideTessFactors);\n";
 sb << "\n";
-sb << "__intrinsic_op void ProcessIsolineTessFactors(\n";
+sb << "void ProcessIsolineTessFactors(\n";
 sb << "    in  float RawDetailFactor,\n";
 sb << "    in  float RawDensityFactor,\n";
 sb << "    out float RoundedDetailFactor,\n";
 sb << "    out float RoundedDensityFactor);\n";
 sb << "\n";
-sb << "__intrinsic_op void ProcessQuadTessFactorsAvg(\n";
+sb << "void ProcessQuadTessFactorsAvg(\n";
 sb << "    in  float4 RawEdgeFactors,\n";
 sb << "    in  float InsideScale,\n";
 sb << "    out float4 RoundedEdgeTessFactors,\n";
 sb << "    out float2 RoundedInsideTessFactors,\n";
 sb << "    out float2 UnroundedInsideTessFactors);\n";
 sb << "\n";
-sb << "__intrinsic_op void ProcessQuadTessFactorsMax(\n";
+sb << "void ProcessQuadTessFactorsMax(\n";
 sb << "    in  float4 RawEdgeFactors,\n";
 sb << "    in  float InsideScale,\n";
 sb << "    out float4 RoundedEdgeTessFactors,\n";
 sb << "    out float2 RoundedInsideTessFactors,\n";
 sb << "    out float2 UnroundedInsideTessFactors);\n";
 sb << "\n";
-sb << "__intrinsic_op void ProcessQuadTessFactorsMin(\n";
+sb << "void ProcessQuadTessFactorsMin(\n";
 sb << "    in  float4 RawEdgeFactors,\n";
 sb << "    in  float InsideScale,\n";
 sb << "    out float4 RoundedEdgeTessFactors,\n";
 sb << "    out float2 RoundedInsideTessFactors,\n";
 sb << "    out float2 UnroundedInsideTessFactors);\n";
 sb << "\n";
-sb << "__intrinsic_op void ProcessTriTessFactorsAvg(\n";
+sb << "void ProcessTriTessFactorsAvg(\n";
 sb << "    in  float3 RawEdgeFactors,\n";
 sb << "    in  float InsideScale,\n";
 sb << "    out float3 RoundedEdgeTessFactors,\n";
 sb << "    out float RoundedInsideTessFactor,\n";
 sb << "    out float UnroundedInsideTessFactor);\n";
 sb << "\n";
-sb << "__intrinsic_op void ProcessTriTessFactorsMax(\n";
+sb << "void ProcessTriTessFactorsMax(\n";
 sb << "    in  float3 RawEdgeFactors,\n";
 sb << "    in  float InsideScale,\n";
 sb << "    out float3 RoundedEdgeTessFactors,\n";
 sb << "    out float RoundedInsideTessFactor,\n";
 sb << "    out float UnroundedInsideTessFactor);\n";
 sb << "\n";
-sb << "__intrinsic_op void ProcessTriTessFactorsMin(\n";
+sb << "void ProcessTriTessFactorsMin(\n";
 sb << "    in  float3 RawEdgeFactors,\n";
 sb << "    in  float InsideScale,\n";
 sb << "    out float3 RoundedEdgeTessFactors,\n";
@@ -798,38 +782,36 @@ sb << "    out float RoundedInsideTessFactors,\n";
 sb << "    out float UnroundedInsideTessFactors);\n";
 sb << "\n";
 sb << "// Degrees to radians\n";
-sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op T radians(T x);\n";
-sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> radians(vector<T,N> x);\n";
-sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> radians(matrix<T,N,M> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType> T radians(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> vector<T,N> radians(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> matrix<T,N,M> radians(matrix<T,N,M> x);\n";
 sb << "\n";
 sb << "// Approximate reciprocal\n";
-sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op T rcp(T x);\n";
-sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> rcp(vector<T,N> x);\n";
-sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> rcp(matrix<T,N,M> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType> T rcp(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> vector<T,N> rcp(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> matrix<T,N,M> rcp(matrix<T,N,M> x);\n";
 sb << "\n";
 sb << "// Reflect incident vector across plane with given normal\n";
 sb << "__generic<T : __BuiltinFloatingPointType, let N : int>\n";
-sb << "__intrinsic_op\n";
 sb << "vector<T,N> reflect(vector<T,N> i, vector<T,N> n);\n";
 sb << "\n";
 sb << "// Refract incident vector given surface normal and index of refraction\n";
 sb << "__generic<T : __BuiltinFloatingPointType, let N : int>\n";
-sb << "__intrinsic_op\n";
 sb << "vector<T,N> refract(vector<T,N> i, vector<T,N> n, float eta);\n";
 sb << "\n";
 sb << "// Reverse order of bits\n";
-sb << "__intrinsic_op uint reversebits(uint value);\n";
+sb << "uint reversebits(uint value);\n";
 sb << "__generic<let N : int> vector<uint,N> reversebits(vector<uint,N> value);\n";
 sb << "\n";
 sb << "// Round-to-nearest\n";
-sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op T round(T x);\n";
-sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> round(vector<T,N> x);\n";
-sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> round(matrix<T,N,M> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType> T round(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> vector<T,N> round(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> matrix<T,N,M> round(matrix<T,N,M> x);\n";
 sb << "\n";
 sb << "// Reciprocal of square root\n";
-sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op T rsqrt(T x);\n";
-sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> rsqrt(vector<T,N> x);\n";
-sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> rsqrt(matrix<T,N,M> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType> T rsqrt(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> vector<T,N> rsqrt(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> matrix<T,N,M> rsqrt(matrix<T,N,M> x);\n";
 sb << "\n";
 sb << "// Clamp value to [0,1] range\n";
 sb << "__generic<T : __BuiltinFloatingPointType>\n";
@@ -877,9 +859,9 @@ sb << "}\n";
 sb << "\n";
 sb << "\n";
 sb << "// Extract sign of value\n";
-sb << "__generic<T : __BuiltinSignedArithmeticType> __intrinsic_op int sign(T x);\n";
-sb << "__generic<T : __BuiltinSignedArithmeticType, let N : int> __intrinsic_op vector<int,N> sign(vector<T,N> x);\n";
-sb << "__generic<T : __BuiltinSignedArithmeticType, let N : int, let M : int> __intrinsic_op matrix<int,N,M> sign(matrix<T,N,M> x);\n";
+sb << "__generic<T : __BuiltinSignedArithmeticType> int sign(T x);\n";
+sb << "__generic<T : __BuiltinSignedArithmeticType, let N : int> vector<int,N> sign(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinSignedArithmeticType, let N : int, let M : int> matrix<int,N,M> sign(matrix<T,N,M> x);\n";
 sb << "\n";
 sb << "\n";
 sb << "// Sine\n";
@@ -888,127 +870,127 @@ sb << "__generic<T : __BuiltinFloatingPointType, let N : int> vector<T,N> sin(ve
 sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> matrix<T,N,M> sin(matrix<T,N,M> x);\n";
 sb << "\n";
 sb << "// Sine and cosine\n";
-sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op void sincos(T x, out T s, out T c);\n";
-sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op void sincos(vector<T,N> x, out vector<T,N> s, out vector<T,N> c);\n";
-sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op void sincos(matrix<T,N,M> x, out matrix<T,N,M> s, out matrix<T,N,M> c);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> void sincos(T x, out T s, out T c);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> void sincos(vector<T,N> x, out vector<T,N> s, out vector<T,N> c);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> void sincos(matrix<T,N,M> x, out matrix<T,N,M> s, out matrix<T,N,M> c);\n";
 sb << "\n";
 sb << "// Hyperbolic Sine\n";
-sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op T sinh(T x);\n";
-sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> sinh(vector<T,N> x);\n";
-sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> sinh(matrix<T,N,M> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType> T sinh(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> vector<T,N> sinh(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> matrix<T,N,M> sinh(matrix<T,N,M> x);\n";
 sb << "\n";
 sb << "// Smooth step (Hermite interpolation)\n";
-sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op T smoothstep(T min, T max, T x);\n";
-sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> smoothstep(vector<T,N> min, vector<T,N> max, vector<T,N> x);\n";
-sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> smoothstep(matrix<T,N,M> min, matrix<T,N,M> max, matrix<T,N,M> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType> T smoothstep(T min, T max, T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> vector<T,N> smoothstep(vector<T,N> min, vector<T,N> max, vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> matrix<T,N,M> smoothstep(matrix<T,N,M> min, matrix<T,N,M> max, matrix<T,N,M> x);\n";
 sb << "\n";
 sb << "// Square root\n";
-sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op T sqrt(T x);\n";
-sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> sqrt(vector<T,N> x);\n";
-sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> sqrt(matrix<T,N,M> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType> T sqrt(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> vector<T,N> sqrt(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> matrix<T,N,M> sqrt(matrix<T,N,M> x);\n";
 sb << "\n";
 sb << "// Step function\n";
-sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op T step(T y, T x);\n";
-sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> step(vector<T,N> y, vector<T,N> x);\n";
-sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> step(matrix<T,N,M> y, matrix<T,N,M> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType> T step(T y, T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> vector<T,N> step(vector<T,N> y, vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> matrix<T,N,M> step(matrix<T,N,M> y, matrix<T,N,M> x);\n";
 sb << "\n";
 sb << "// Tangent\n";
-sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op T tan(T x);\n";
-sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> tan(vector<T,N> x);\n";
-sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> tan(matrix<T,N,M> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType> T tan(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> vector<T,N> tan(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> matrix<T,N,M> tan(matrix<T,N,M> x);\n";
 sb << "\n";
 sb << "// Hyperbolic tangent\n";
-sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op T tanh(T x);\n";
-sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> tanh(vector<T,N> x);\n";
-sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> tanh(matrix<T,N,M> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType> T tanh(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> vector<T,N> tanh(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> matrix<T,N,M> tanh(matrix<T,N,M> x);\n";
 sb << "\n";
 sb << "// Legacy texture-fetch operations\n";
 sb << "\n";
 sb << "/*\n";
-sb << "__intrinsic_op float4 tex1D(sampler1D s, float t);\n";
-sb << "__intrinsic_op float4 tex1D(sampler1D s, float t, float ddx, float ddy);\n";
-sb << "__intrinsic_op float4 tex1Dbias(sampler1D s, float4 t);\n";
-sb << "__intrinsic_op float4 tex1Dgrad(sampler1D s, float t, float ddx, float ddy);\n";
-sb << "__intrinsic_op float4 tex1Dlod(sampler1D s, float4 t);\n";
-sb << "__intrinsic_op float4 tex1Dproj(sampler1D s, float4 t);\n";
-sb << "\n";
-sb << "__intrinsic_op float4 tex2D(sampler2D s, float2 t);\n";
-sb << "__intrinsic_op float4 tex2D(sampler2D s, float2 t, float2 ddx, float2 ddy);\n";
-sb << "__intrinsic_op float4 tex2Dbias(sampler2D s, float4 t);\n";
-sb << "__intrinsic_op float4 tex2Dgrad(sampler2D s, float2 t, float2 ddx, float2 ddy);\n";
-sb << "__intrinsic_op float4 tex2Dlod(sampler2D s, float4 t);\n";
-sb << "__intrinsic_op float4 tex2Dproj(sampler2D s, float4 t);\n";
-sb << "\n";
-sb << "__intrinsic_op float4 tex3D(sampler3D s, float3 t);\n";
-sb << "__intrinsic_op float4 tex3D(sampler3D s, float3 t, float3 ddx, float3 ddy);\n";
-sb << "__intrinsic_op float4 tex3Dbias(sampler3D s, float4 t);\n";
-sb << "__intrinsic_op float4 tex3Dgrad(sampler3D s, float3 t, float3 ddx, float3 ddy);\n";
-sb << "__intrinsic_op float4 tex3Dlod(sampler3D s, float4 t);\n";
-sb << "__intrinsic_op float4 tex3Dproj(sampler3D s, float4 t);\n";
-sb << "\n";
-sb << "__intrinsic_op float4 texCUBE(samplerCUBE s, float3 t);\n";
-sb << "__intrinsic_op float4 texCUBE(samplerCUBE s, float3 t, float3 ddx, float3 ddy);\n";
-sb << "__intrinsic_op float4 texCUBEbias(samplerCUBE s, float4 t);\n";
-sb << "__intrinsic_op float4 texCUBEgrad(samplerCUBE s, float3 t, float3 ddx, float3 ddy);\n";
-sb << "__intrinsic_op float4 texCUBElod(samplerCUBE s, float4 t);\n";
-sb << "__intrinsic_op float4 texCUBEproj(samplerCUBE s, float4 t);\n";
+sb << "float4 tex1D(sampler1D s, float t);\n";
+sb << "float4 tex1D(sampler1D s, float t, float ddx, float ddy);\n";
+sb << "float4 tex1Dbias(sampler1D s, float4 t);\n";
+sb << "float4 tex1Dgrad(sampler1D s, float t, float ddx, float ddy);\n";
+sb << "float4 tex1Dlod(sampler1D s, float4 t);\n";
+sb << "float4 tex1Dproj(sampler1D s, float4 t);\n";
+sb << "\n";
+sb << "float4 tex2D(sampler2D s, float2 t);\n";
+sb << "float4 tex2D(sampler2D s, float2 t, float2 ddx, float2 ddy);\n";
+sb << "float4 tex2Dbias(sampler2D s, float4 t);\n";
+sb << "float4 tex2Dgrad(sampler2D s, float2 t, float2 ddx, float2 ddy);\n";
+sb << "float4 tex2Dlod(sampler2D s, float4 t);\n";
+sb << "float4 tex2Dproj(sampler2D s, float4 t);\n";
+sb << "\n";
+sb << "float4 tex3D(sampler3D s, float3 t);\n";
+sb << "float4 tex3D(sampler3D s, float3 t, float3 ddx, float3 ddy);\n";
+sb << "float4 tex3Dbias(sampler3D s, float4 t);\n";
+sb << "float4 tex3Dgrad(sampler3D s, float3 t, float3 ddx, float3 ddy);\n";
+sb << "float4 tex3Dlod(sampler3D s, float4 t);\n";
+sb << "float4 tex3Dproj(sampler3D s, float4 t);\n";
+sb << "\n";
+sb << "float4 texCUBE(samplerCUBE s, float3 t);\n";
+sb << "float4 texCUBE(samplerCUBE s, float3 t, float3 ddx, float3 ddy);\n";
+sb << "float4 texCUBEbias(samplerCUBE s, float4 t);\n";
+sb << "float4 texCUBEgrad(samplerCUBE s, float3 t, float3 ddx, float3 ddy);\n";
+sb << "float4 texCUBElod(samplerCUBE s, float4 t);\n";
+sb << "float4 texCUBEproj(samplerCUBE s, float4 t);\n";
 sb << "*/\n";
 sb << "\n";
 sb << "// Matrix transpose\n";
-sb << "__generic<T : __BuiltinType, let N : int, let M : int> __intrinsic_op matrix<T,M,N> transpose(matrix<T,N,M> x);\n";
+sb << "__generic<T : __BuiltinType, let N : int, let M : int> matrix<T,M,N> transpose(matrix<T,N,M> x);\n";
 sb << "\n";
 sb << "// Truncate to integer\n";
-sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op T trunc(T x);\n";
-sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> trunc(vector<T,N> x);\n";
-sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> trunc(matrix<T,N,M> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType> T trunc(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> vector<T,N> trunc(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> matrix<T,N,M> trunc(matrix<T,N,M> x);\n";
 sb << "\n";
 sb << "// Shader model 6.0 stuff\n";
 sb << "\n";
-sb << "__intrinsic_op uint GlobalOrderedCountIncrement(uint countToAppendForThisLane);\n";
+sb << "uint GlobalOrderedCountIncrement(uint countToAppendForThisLane);\n";
 sb << "\n";
-sb << "__generic<T : __BuiltinType> __intrinsic_op T QuadReadLaneAt(T sourceValue, int quadLaneID);\n";
-sb << "__generic<T : __BuiltinType, let N : int> __intrinsic_op vector<T,N> QuadReadLaneAt(vector<T,N> sourceValue, int quadLaneID);\n";
-sb << "__generic<T : __BuiltinType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> QuadReadLaneAt(matrix<T,N,M> sourceValue, int quadLaneID);\n";
+sb << "__generic<T : __BuiltinType> T QuadReadLaneAt(T sourceValue, int quadLaneID);\n";
+sb << "__generic<T : __BuiltinType, let N : int> vector<T,N> QuadReadLaneAt(vector<T,N> sourceValue, int quadLaneID);\n";
+sb << "__generic<T : __BuiltinType, let N : int, let M : int> matrix<T,N,M> QuadReadLaneAt(matrix<T,N,M> sourceValue, int quadLaneID);\n";
 sb << "\n";
-sb << "__generic<T : __BuiltinType> __intrinsic_op T QuadSwapX(T localValue);\n";
-sb << "__generic<T : __BuiltinType, let N : int> __intrinsic_op vector<T,N> QuadSwapX(vector<T,N> localValue);\n";
-sb << "__generic<T : __BuiltinType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> QuadSwapX(matrix<T,N,M> localValue);\n";
+sb << "__generic<T : __BuiltinType> T QuadSwapX(T localValue);\n";
+sb << "__generic<T : __BuiltinType, let N : int> vector<T,N> QuadSwapX(vector<T,N> localValue);\n";
+sb << "__generic<T : __BuiltinType, let N : int, let M : int> matrix<T,N,M> QuadSwapX(matrix<T,N,M> localValue);\n";
 sb << "\n";
-sb << "__generic<T : __BuiltinType> __intrinsic_op T QuadSwapY(T localValue);\n";
-sb << "__generic<T : __BuiltinType, let N : int> __intrinsic_op vector<T,N> QuadSwapY(vector<T,N> localValue);\n";
-sb << "__generic<T : __BuiltinType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> QuadSwapY(matrix<T,N,M> localValue);\n";
+sb << "__generic<T : __BuiltinType> T QuadSwapY(T localValue);\n";
+sb << "__generic<T : __BuiltinType, let N : int> vector<T,N> QuadSwapY(vector<T,N> localValue);\n";
+sb << "__generic<T : __BuiltinType, let N : int, let M : int> matrix<T,N,M> QuadSwapY(matrix<T,N,M> localValue);\n";
 sb << "\n";
-sb << "__generic<T : __BuiltinIntegerType> __intrinsic_op T WaveAllBitAnd(T expr);\n";
-sb << "__generic<T : __BuiltinIntegerType, let N : int> __intrinsic_op vector<T,N> WaveAllBitAnd(vector<T,N> expr);\n";
-sb << "__generic<T : __BuiltinIntegerType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> WaveAllBitAnd(matrix<T,N,M> expr);\n";
+sb << "__generic<T : __BuiltinIntegerType> T WaveAllBitAnd(T expr);\n";
+sb << "__generic<T : __BuiltinIntegerType, let N : int> vector<T,N> WaveAllBitAnd(vector<T,N> expr);\n";
+sb << "__generic<T : __BuiltinIntegerType, let N : int, let M : int> matrix<T,N,M> WaveAllBitAnd(matrix<T,N,M> expr);\n";
 sb << "\n";
-sb << "__generic<T : __BuiltinIntegerType> __intrinsic_op T WaveAllBitOr(T expr);\n";
-sb << "__generic<T : __BuiltinIntegerType, let N : int> __intrinsic_op vector<T,N> WaveAllBitOr(vector<T,N> expr);\n";
-sb << "__generic<T : __BuiltinIntegerType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> WaveAllBitOr(matrix<T,N,M> expr);\n";
+sb << "__generic<T : __BuiltinIntegerType> T WaveAllBitOr(T expr);\n";
+sb << "__generic<T : __BuiltinIntegerType, let N : int> vector<T,N> WaveAllBitOr(vector<T,N> expr);\n";
+sb << "__generic<T : __BuiltinIntegerType, let N : int, let M : int> matrix<T,N,M> WaveAllBitOr(matrix<T,N,M> expr);\n";
 sb << "\n";
-sb << "__generic<T : __BuiltinIntegerType> __intrinsic_op T WaveAllBitXor(T expr);\n";
-sb << "__generic<T : __BuiltinIntegerType, let N : int> __intrinsic_op vector<T,N> WaveAllBitXor(vector<T,N> expr);\n";
-sb << "__generic<T : __BuiltinIntegerType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> WaveAllBitXor(matrix<T,N,M> expr);\n";
+sb << "__generic<T : __BuiltinIntegerType> T WaveAllBitXor(T expr);\n";
+sb << "__generic<T : __BuiltinIntegerType, let N : int> vector<T,N> WaveAllBitXor(vector<T,N> expr);\n";
+sb << "__generic<T : __BuiltinIntegerType, let N : int, let M : int> matrix<T,N,M> WaveAllBitXor(matrix<T,N,M> expr);\n";
 sb << "\n";
-sb << "__generic<T : __BuiltinArithmeticType> __intrinsic_op T WaveAllMax(T expr);\n";
-sb << "__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> WaveAllMax(vector<T,N> expr);\n";
-sb << "__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> WaveAllMax(matrix<T,N,M> expr);\n";
+sb << "__generic<T : __BuiltinArithmeticType> T WaveAllMax(T expr);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int> vector<T,N> WaveAllMax(vector<T,N> expr);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int, let M : int> matrix<T,N,M> WaveAllMax(matrix<T,N,M> expr);\n";
 sb << "\n";
-sb << "__generic<T : __BuiltinArithmeticType> __intrinsic_op T WaveAllMin(T expr);\n";
-sb << "__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> WaveAllMin(vector<T,N> expr);\n";
-sb << "__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> WaveAllMin(matrix<T,N,M> expr);\n";
+sb << "__generic<T : __BuiltinArithmeticType> T WaveAllMin(T expr);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int> vector<T,N> WaveAllMin(vector<T,N> expr);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int, let M : int> matrix<T,N,M> WaveAllMin(matrix<T,N,M> expr);\n";
 sb << "\n";
-sb << "__generic<T : __BuiltinArithmeticType> __intrinsic_op T WaveAllProduct(T expr);\n";
-sb << "__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> WaveAllProduct(vector<T,N> expr);\n";
-sb << "__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> WaveAllProduct(matrix<T,N,M> expr);\n";
+sb << "__generic<T : __BuiltinArithmeticType> T WaveAllProduct(T expr);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int> vector<T,N> WaveAllProduct(vector<T,N> expr);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int, let M : int> matrix<T,N,M> WaveAllProduct(matrix<T,N,M> expr);\n";
 sb << "\n";
-sb << "__generic<T : __BuiltinArithmeticType> __intrinsic_op T WaveAllSum(T expr);\n";
-sb << "__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> WaveAllSum(vector<T,N> expr);\n";
-sb << "__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> WaveAllSum(matrix<T,N,M> expr);\n";
+sb << "__generic<T : __BuiltinArithmeticType> T WaveAllSum(T expr);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int> vector<T,N> WaveAllSum(vector<T,N> expr);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int, let M : int> matrix<T,N,M> WaveAllSum(matrix<T,N,M> expr);\n";
 sb << "\n";
-sb << "__intrinsic_op bool WaveAllEqual(bool expr);\n";
-sb << "__intrinsic_op bool WaveAllTrue(bool expr);\n";
-sb << "__intrinsic_op bool WaveAnyTrue(bool expr);\n";
+sb << "bool WaveAllEqual(bool expr);\n";
+sb << "bool WaveAllTrue(bool expr);\n";
+sb << "bool WaveAnyTrue(bool expr);\n";
 sb << "\n";
 sb << "uint64_t WaveBallot(bool expr);\n";
 sb << "\n";
@@ -1020,21 +1002,21 @@ sb << "bool WaveIsHelperLane();\n";
 sb << "\n";
 sb << "bool WaveOnce();\n";
 sb << "\n";
-sb << "__generic<T : __BuiltinArithmeticType> __intrinsic_op T WavePrefixProduct(T expr);\n";
-sb << "__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> WavePrefixProduct(vector<T,N> expr);\n";
-sb << "__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> WavePrefixProduct(matrix<T,N,M> expr);\n";
+sb << "__generic<T : __BuiltinArithmeticType> T WavePrefixProduct(T expr);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int> vector<T,N> WavePrefixProduct(vector<T,N> expr);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int, let M : int> matrix<T,N,M> WavePrefixProduct(matrix<T,N,M> expr);\n";
 sb << "\n";
-sb << "__generic<T : __BuiltinArithmeticType> __intrinsic_op T WavePrefixSum(T expr);\n";
-sb << "__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> WavePrefixSum(vector<T,N> expr);\n";
-sb << "__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> WavePrefixSum(matrix<T,N,M> expr);\n";
+sb << "__generic<T : __BuiltinArithmeticType> T WavePrefixSum(T expr);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int> vector<T,N> WavePrefixSum(vector<T,N> expr);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int, let M : int> matrix<T,N,M> WavePrefixSum(matrix<T,N,M> expr);\n";
 sb << "\n";
-sb << "__generic<T : __BuiltinType> __intrinsic_op T WaveReadFirstLane(T expr);\n";
-sb << "__generic<T : __BuiltinType, let N : int> __intrinsic_op vector<T,N> WaveReadFirstLane(vector<T,N> expr);\n";
-sb << "__generic<T : __BuiltinType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> WaveReadFirstLane(matrix<T,N,M> expr);\n";
+sb << "__generic<T : __BuiltinType> T WaveReadFirstLane(T expr);\n";
+sb << "__generic<T : __BuiltinType, let N : int> vector<T,N> WaveReadFirstLane(vector<T,N> expr);\n";
+sb << "__generic<T : __BuiltinType, let N : int, let M : int> matrix<T,N,M> WaveReadFirstLane(matrix<T,N,M> expr);\n";
 sb << "\n";
-sb << "__generic<T : __BuiltinType> __intrinsic_op T WaveReadLaneAt(T expr, int laneIndex);\n";
-sb << "__generic<T : __BuiltinType, let N : int> __intrinsic_op vector<T,N> WaveReadLaneAt(vector<T,N> expr, int laneIndex);\n";
-sb << "__generic<T : __BuiltinType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> WaveReadLaneAt(matrix<T,N,M> expr, int laneIndex);\n";
+sb << "__generic<T : __BuiltinType> T WaveReadLaneAt(T expr, int laneIndex);\n";
+sb << "__generic<T : __BuiltinType, let N : int> vector<T,N> WaveReadLaneAt(vector<T,N> expr, int laneIndex);\n";
+sb << "__generic<T : __BuiltinType, let N : int, let M : int> matrix<T,N,M> WaveReadLaneAt(matrix<T,N,M> expr, int laneIndex);\n";
 sb << "\n";
 sb << "// `typedef`s to help with the fact that HLSL has been sorta-kinda case insensitive at various points\n";
 sb << "typedef Texture2D texture2D;\n";
@@ -1095,15 +1077,15 @@ for (int aa = 0; aa < kBaseBufferAccessLevelCount; ++aa)
     sb << kBaseBufferAccessLevels[aa].name;
     sb << "Buffer {\n";
 
-    sb << "__intrinsic_op void GetDimensions(out uint dim);\n";
+    sb << "void GetDimensions(out uint dim);\n";
 
     sb << "__target_intrinsic(glsl, \"texelFetch($P, $0)$z\")\n";
-    sb << "__intrinsic_op T Load(int location);\n";
+    sb << "T Load(int location);\n";
 
-    sb << "__intrinsic_op T Load(int location, out uint status);\n";
+    sb << "T Load(int location, out uint status);\n";
 
     sb << "__target_intrinsic(glsl, \"texelFetch($P, int($0))$z\")\n";
-    sb << "__intrinsic_op __subscript(uint index) -> T";
+    sb << "__subscript(uint index) -> T";
 
     if (kBaseBufferAccessLevels[aa].access != SLANG_RESOURCE_ACCESS_READ)
     {
-- 
cgit v1.2.3


From 9640df03814593d2f4b34c36bbec6756b1ed7fba Mon Sep 17 00:00:00 2001
From: Tim Foley <tfoley@nvidia.com>
Date: Tue, 7 Nov 2017 08:31:06 -0800
Subject: Handle "ThisType" subsitutions when specialization generics in the IR

The original code is handling the issue where a call site might be specializing a generic function, so it has a `DeclRef` that represents what it wants to specialize, but the callee is actually a different overload of the same generic function (e.g., a target-specific overload) and so we need to construct a set of substitutions that are equivalent (same arguments), but point to different `GenericDecl`s.

That code was making some bad assumptions, though:

1. It assumed that the substitutions list would always start with a generic substitution (no longer true with `ThisTypeSubstitution`.
2. It assumed that only the top-most substitution would need to be translated. This assumption is probably safe for now, but it could break down if we ever introduced an ability for a type to be re-opened to introduce new (target-specific) overloads of its members.

The new approach goes ahead and does a deep copy of the substitition list (but a shallow copy of the arguments), and only copies the generic substititions for now.
---
 source/slang/ir.cpp | 80 ++++++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 76 insertions(+), 4 deletions(-)

(limited to 'source')

diff --git a/source/slang/ir.cpp b/source/slang/ir.cpp
index d6a01a484..439bc7797 100644
--- a/source/slang/ir.cpp
+++ b/source/slang/ir.cpp
@@ -3866,6 +3866,78 @@ namespace Slang
         return originalType->Substitute(subst).As<Type>();
     }
 
+    // Given a list of substitutions, return the inner-most
+    // generic substitution in the list, or NULL if there
+    // are no generic substitutions.
+    RefPtr<GenericSubstitution> getInnermostGenericSubst(
+        Substitutions*  inSubst)
+    {
+        auto subst = inSubst;
+        while( subst )
+        {
+            GenericSubstitution* genericSubst = dynamic_cast<GenericSubstitution*>(subst);
+            if(genericSubst)
+                return genericSubst;
+
+            subst = subst->outer;
+        }
+        return nullptr;
+    }
+
+    RefPtr<GenericDecl> getInnermostGenericDecl(
+        Decl*   inDecl)
+    {
+        auto decl = inDecl;
+        while( decl )
+        {
+            GenericDecl* genericDecl = dynamic_cast<GenericDecl*>(decl);
+            if(genericDecl)
+                return genericDecl;
+
+            decl = decl->ParentDecl;
+        }
+        return nullptr;
+    }
+
+    // This function takes a list of substitutions that we'd
+    // like to apply, but which (1) might apply to a different
+    // declaration in cases where we have got target-specific
+    // overloads in the mix, and (2) might include some `ThisType`
+    // substitutions, which we don't care about in this context,
+    // and produces a new set of substitutiosn without these
+    // two issues.
+    RefPtr<Substitutions> cloneSubstitutionsForSpecialization(
+        IRSharedGenericSpecContext* sharedContext,
+        Substitutions*              oldSubst,
+        Decl*                       newDecl)
+    {
+        // We will "peel back" layers of substitutions until
+        // we find our first generic subsitution.
+        auto oldGenericSubst = getInnermostGenericSubst(oldSubst);
+        if(!oldGenericSubst)
+            return nullptr;
+
+        // We will also peel back layers of declarations until
+        // we find our first generic decl.
+        auto newGenericDecl = getInnermostGenericDecl(newDecl);
+        if( !newGenericDecl )
+        {
+//            SLANG_UNEXPECTED("generic subst without generic decl");
+            return nullptr;
+        }
+
+        RefPtr<GenericSubstitution> newSubst = new GenericSubstitution();
+        newSubst->genericDecl = newGenericDecl;
+        newSubst->args = oldGenericSubst->args;
+
+        newSubst->outer = cloneSubstitutionsForSpecialization(
+            sharedContext,
+            oldGenericSubst->outer,
+            newGenericDecl->ParentDecl);
+
+        return newSubst;
+    }
+
 
     IRFunc* getSpecializedFunc(
         IRSharedGenericSpecContext* sharedContext,
@@ -3897,10 +3969,10 @@ namespace Slang
         // using a different overload of a target-specific function,
         // so we need to create a dummy substitution here, to make
         // sure it used the correct generic.
-        RefPtr<GenericSubstitution> newSubst = new GenericSubstitution();
-        newSubst->genericDecl = genericFunc->genericDecl;
-        auto specDeclRefSubst = specDeclRef.substitutions.As<GenericSubstitution>();
-        newSubst->args = specDeclRefSubst->args;
+        RefPtr<Substitutions> newSubst = cloneSubstitutionsForSpecialization(
+            sharedContext,
+            specDeclRef.substitutions,
+            genericFunc->genericDecl);
 
         IRGenericSpecContext context;
         context.shared = sharedContext;
-- 
cgit v1.2.3


From 93a444fe1b5f1e3c6da67db4d948df53a0bdb3f6 Mon Sep 17 00:00:00 2001
From: Tim Foley <tfoley@nvidia.com>
Date: Tue, 7 Nov 2017 09:38:32 -0800
Subject: Attach correct types to subscript accessors

Subscript declarations can have nested "accessor" declarations for the get/set behavior:

```
__subscript(int index) -> float
{
    get { ... }
    set { ... }
}
```

The AST type checks an expression like `a[i]` into a call to an appropriate `__subscript` declaration, and reads the return type off of that, but doesn't drill down to the individual getters/setters.

During IR code generation, we need to resolve a call to the subscript operation down to the actual getter or setter, since those are what will have the executable code (or be intrinsics). If we have a non-intrinsic accessor, then we end up asking for its "return type" and get NULL, which crashes the compiler.

The fix in this case is to add a bit more semantic checking for accessors, mostly just so that we can have them copy the return type from their parent declaration. While we are at it, this change goes ahead and has an accessor validate that the parent declaration is one that should be allowed, and emit a diagnostic if it is nested in an improper place.
---
 source/slang/check.cpp         | 22 ++++++++++++++++++++++
 source/slang/diagnostic-defs.h |  1 +
 2 files changed, 23 insertions(+)

(limited to 'source')

diff --git a/source/slang/check.cpp b/source/slang/check.cpp
index ed2ed4a1b..6bb7c232f 100644
--- a/source/slang/check.cpp
+++ b/source/slang/check.cpp
@@ -3471,11 +3471,33 @@ namespace Slang
 
             decl->SetCheckState(DeclCheckState::CheckedHeader);
 
+            for(auto mm : decl->Members)
+            {
+                checkDecl(mm);
+            }
+
             decl->SetCheckState(DeclCheckState::Checked);
         }
 
         void visitAccessorDecl(AccessorDecl* decl)
         {
+            // An acessor must appear nested inside a subscript declaration (today),
+            // or a property declaration (when we add them). It will derive
+            // its return type from the outer declaration, so we handle both
+            // of these checks at the same place.
+            auto parent = decl->ParentDecl;
+            if(auto parentSubscript = dynamic_cast<SubscriptDecl*>(parent))
+            {
+                decl->ReturnType = parentSubscript->ReturnType;
+            }
+            // TODO: when we add "property" declarations, check for them here
+            else
+            {
+                getSink()->diagnose(decl, Diagnostics::accessorMustBeInsideSubscriptOrProperty);
+            }
+
+            decl->SetCheckState(DeclCheckState::CheckedHeader);
+
             // TODO: check the body!
 
             decl->SetCheckState(DeclCheckState::Checked);
diff --git a/source/slang/diagnostic-defs.h b/source/slang/diagnostic-defs.h
index 52f5d48a0..10b2dbd1e 100644
--- a/source/slang/diagnostic-defs.h
+++ b/source/slang/diagnostic-defs.h
@@ -246,6 +246,7 @@ DIAGNOSTIC(38003, Error, entryPointSymbolNotAFunction, "entry point '$0' must be
 DIAGNOSTIC(38100, Error, typeDoesntImplementInterfaceRequirement, "type '$0' does not provide required interface member '$1'")
 DIAGNOSTIC(38101, Error, thisExpressionOutsideOfTypeDecl, "'this' expression can only be used in members of an aggregate type")
 DIAGNOSTIC(38102, Error, initializerNotInsideType, "an 'init' declaration is only allowed inside a type or 'extension' declaration")
+DIAGNOSTIC(38102, Error, accessorMustBeInsideSubscriptOrProperty, "an accessor declaration is only allowed inside a subscript or property declaration")
 
 //
 // 4xxxx - IL code generation.
-- 
cgit v1.2.3


From 5c220292d6ac2674942bb5f1bb09fe1817151c11 Mon Sep 17 00:00:00 2001
From: Tim Foley <tfoley@nvidia.com>
Date: Tue, 7 Nov 2017 09:25:41 -0800
Subject: Fixes for name mangling/demangling

The source of a lot of these changes is that our current strategy for dealing with "builtin" operations when emitting HLSL from the IR is to de-mangle the mangled name of an operation, and then emit HLSL code for a function call to an operation with that de-mangled name.

This change introduces a few fixups for that work:

- It adds support for parsing the mangled names of generics (specialized and unspecialized)

- It adds logic for detecting when the operation being invoked is a member function
  - This is currently a bit ugly, since we compare the number of actual arguments we have in the IR against the number of parameters declared for the callee, and if they don't match we assume we have an extra `this` argument.

On the mangling side, we add (hacky) support for mangling a function name when its types involve generic parameters, e.g.:

```
__generic<T, let N : int> T length(vector<T,N> v);
```

In this case the mangled name of the function needs to include a mangling for the type `vector<T,N>` which means it also needs to include a mangling for `N`.

The reason I describe this support as "hacky" is because we really shouldn't be reproducing the names `T` or `N` in the mangled symbol name. By doing so we make it so that a user changing the name of a generic parameter would break (IR) binary compatibility with existing code that was separately compiled.
I've included comments in the code about a better way to handle this, but it isn't a priorit right now since binary compatibility isn't something meaningful until we start emitting usable bytecode modules.
---
 source/slang/emit.cpp   | 144 +++++++++++++++++++++++++++++++++++++++++++++---
 source/slang/mangle.cpp |  12 ++++
 2 files changed, 147 insertions(+), 9 deletions(-)

(limited to 'source')

diff --git a/source/slang/emit.cpp b/source/slang/emit.cpp
index d95946204..7e4fca4e5 100644
--- a/source/slang/emit.cpp
+++ b/source/slang/emit.cpp
@@ -4677,7 +4677,7 @@ emitDeclImpl(decl, nullptr);
             if(c == '0')
                 return 0;
 
-            int count = 0;
+            UInt count = 0;
             for(;;)
             {
                 count = count*10 + c - '0';
@@ -4689,18 +4689,117 @@ emitDeclImpl(decl, nullptr);
             }
         }
 
+        void readGenericParam()
+        {
+            switch(peek())
+            {
+            case 'T':
+                get();
+                break;
+
+            default:
+                SLANG_UNEXPECTED("bad name mangling");
+                break;
+            }
+        }
+
+        void readGenericParams()
+        {
+            expect("g");
+            UInt paramCount = readCount();
+            for(UInt pp = 0; pp < paramCount; pp++)
+            {
+                readGenericParam();
+            }
+        }
+
+        void readSimpleIntVal()
+        {
+            int c = peek();
+            if(isDigit(c))
+            {
+                get();
+            }
+            else
+            {
+                readVal();
+            }
+        }
+
+        void readType()
+        {
+            int c = peek();
+            switch(c)
+            {
+            case 'V':
+            case 'b':
+            case 'i':
+            case 'u':
+            case 'U':
+            case 'h':
+            case 'f':
+            case 'd':
+                get();
+                break;
+
+            case 'v':
+                get();
+                readSimpleIntVal();
+                readType();
+                break;
+
+            default:
+                // TODO: need to read a named type
+                // here...
+                break;
+            }
+        }
+
+        void readVal()
+        {
+            // TODO: handle other cases here
+            readType();
+        }
+
+        void readGenericArg()
+        {
+            readVal();
+        }
+
+        void readGenericArgs()
+        {
+            expect("G");
+            UInt argCount = readCount();
+            for(UInt aa = 0; aa < argCount; aa++)
+            {
+                readGenericArg();
+            }
+        }
+
         UnownedStringSlice readSimpleName()
         {
             UnownedStringSlice result;
             for(;;)
             {
                 int c = peek();
+
+                if(c == 'g')
+                {
+                    readGenericParams();
+                    continue;
+                }
+                else if(c == 'G')
+                {
+                    readGenericArgs();
+                    continue;
+                }
+
                 if(!isDigit((char)c))
                     return result;
 
                 // Read the length part
-                int count = readCount();
-                if(count > (end_ - cursor_))
+                UInt count = readCount();
+                if(count > UInt(end_ - cursor_))
                 {
                     SLANG_UNEXPECTED("bad name mangling");
                     UNREACHABLE_RETURN(result);
@@ -4710,6 +4809,12 @@ emitDeclImpl(decl, nullptr);
                 cursor_ += count;
             }
         }
+
+        UInt readParamCount()
+        {
+            expect("p");
+            return readCount();
+        }
     };
 
     void emitIntrinsicCallExpr(
@@ -4726,16 +4831,37 @@ emitDeclImpl(decl, nullptr);
 
         auto name = um.readSimpleName();
 
-        // TODO: need to detect if name represents
-        // a member function, etc.
+        // The mangled function name currently records
+        // the number of explicit parameters, and thus
+        // doesn't include the implicit `this` parameter.
+        // We can compare the argument and parameter counts
+        // to figure out whether we have a member function call.
+        UInt paramCount = um.readParamCount();
+
+        // For a call with N arguments, the instruction will
+        // have N+1 operands.
+        UInt operandCount = inst->getArgCount();
+        UInt argCount = operandCount - 1;
+        UInt operandIndex = 1;
+
+        if(argCount != paramCount)
+        {
+            // Looks like a member function call
+            emit("(");
+            emitIROperand(ctx, inst->getArg(operandIndex));
+            emit(").");
+
+            operandIndex++;
+        }
 
         emit(name);
         emit("(");
-        UInt argCount = inst->getArgCount();
-        for( UInt aa = 1; aa < argCount; ++aa )
+        bool first = true;
+        for(; operandIndex < operandCount; ++operandIndex )
         {
-            if(aa != 1) emit(", ");
-            emitIROperand(ctx, inst->getArg(aa));
+            if(!first) emit(", ");
+            emitIROperand(ctx, inst->getArg(operandIndex));
+            first = false;
         }
         emit(")");
     }
diff --git a/source/slang/mangle.cpp b/source/slang/mangle.cpp
index b9fba6380..dca48f671 100644
--- a/source/slang/mangle.cpp
+++ b/source/slang/mangle.cpp
@@ -152,6 +152,18 @@ namespace Slang
             // value, so we certainly don't want to include
             // it in the mangling.
         }
+        else if( auto genericParamIntVal = dynamic_cast<GenericParamIntVal*>(val) )
+        {
+            // TODO: we shouldn't be including the names of generic parameters
+            // anywhere in mangled names, since changing parameter names
+            // shouldn't break binary compatibility.
+            //
+            // The right solution in the long term is for generic parameters
+            // (both types and values) to be mangled in terms of their
+            // "depth" (how many outer generics) and "index" (which
+            // parameter are they at the specified depth).
+            emitName(context, genericParamIntVal->declRef.GetName());
+        }
         else
         {
             SLANG_UNEXPECTED("unimplemented case in mangling");
-- 
cgit v1.2.3


From 722105feb1f11ea727af52bc5a484ddf4320e74d Mon Sep 17 00:00:00 2001
From: Tim Foley <tfoley@nvidia.com>
Date: Tue, 7 Nov 2017 09:45:08 -0800
Subject: Add a comparison operator to UnownedStringSlice

This is to allow me to compare for particular names in my de-mangling logic in `emit.cpp`.
---
 source/core/slang-string.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'source')

diff --git a/source/core/slang-string.h b/source/core/slang-string.h
index 0808d9715..a0f2b48a1 100644
--- a/source/core/slang-string.h
+++ b/source/core/slang-string.h
@@ -154,6 +154,22 @@ namespace Slang
             return endData;
         }
 
+        UInt size() const
+        {
+            return endData - beginData;
+        }
+
+        bool operator==(UnownedStringSlice const& other)
+        {
+            return size() == other.size()
+                && memcmp(begin(), other.begin(), size()) == 0;
+        }
+
+        bool operator==(char const* str)
+        {
+            return (*this) == UnownedStringSlice(str, str + strlen(str));
+        }
+
     private:
         char const* beginData;
         char const* endData;
-- 
cgit v1.2.3


From 97a1a95b6192599e038a26704756a914368f3d3a Mon Sep 17 00:00:00 2001
From: Tim Foley <tfoley@nvidia.com>
Date: Tue, 7 Nov 2017 09:55:53 -0800
Subject: Try to fix up IR emit for subscript calls

This code isn't especially useful right now since most of the important subscripts are still special-cased with `__intrinsic_op`, but the idea is that if we de-mangle an intrinsic operation's name and see it is called `operator[]` then we are probably calling a subscript, and should emit an appropriate expression.

Aside: this change has pointed out to me that our current name mangling isn't properly handling non-alphanumeric characters, so we'll be in trouble as soon as we have non-intrinsic subscripts, operators, etc.
---
 source/slang/emit.cpp | 49 +++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 39 insertions(+), 10 deletions(-)

(limited to 'source')

diff --git a/source/slang/emit.cpp b/source/slang/emit.cpp
index 7e4fca4e5..615fb47a0 100644
--- a/source/slang/emit.cpp
+++ b/source/slang/emit.cpp
@@ -4664,7 +4664,7 @@ emitDeclImpl(decl, nullptr);
             expect("_S");
         }
 
-        int readCount()
+        UInt readCount()
         {
             int c = peek();
             if(!isDigit((char)c))
@@ -4822,15 +4822,50 @@ emitDeclImpl(decl, nullptr);
         IRCall*         inst,
         IRFunc*         func)
     {
-        // TODO: we need to inspect the mangled name,
-        // and construct a suitable expression from it...
+        // For a call with N arguments, the instruction will
+        // have N+1 operands. We will start consuming operands
+        // starting at the index 1.
+        UInt operandCount = inst->getArgCount();
+        UInt argCount = operandCount - 1;
+        UInt operandIndex = 1;
 
-        UnmangleContext um(func->mangledName);
+        // Our current strategy for dealing with intrinsic
+        // calls is to "un-mangle" the mangled name, in
+        // order to figure out what the user was originally
+        // calling. This is a bit messy, and there might
+        // be better strategies (including just stuffing
+        // a pointer to the original decl onto the callee).
 
+        UnmangleContext um(func->mangledName);
         um.startUnmangling();
 
+        // We'll read through the qualified name of the
+        // symbol (e.g., `Texture2D<T>.Sample`) and then
+        // only keep the last segment of the name (e.g.,
+        // the `Sample` part).
         auto name = um.readSimpleName();
 
+        // We will special-case some names here, that
+        // represent callable declarations that aren't
+        // ordinary functions, and thus may use different
+        // syntax.
+        if(name == "operator[]")
+        {
+            // The user is invoking a built-in subscript operator
+            emit("(");
+            emitIROperand(ctx, inst->getArg(operandIndex++));
+            emit(")[");
+            emitIROperand(ctx, inst->getArg(operandIndex++));
+            emit("]");
+
+            if(operandIndex < operandCount)
+            {
+                emit(" = ");
+                emitIROperand(ctx, inst->getArg(operandIndex++));
+            }
+            return;
+        }
+
         // The mangled function name currently records
         // the number of explicit parameters, and thus
         // doesn't include the implicit `this` parameter.
@@ -4838,12 +4873,6 @@ emitDeclImpl(decl, nullptr);
         // to figure out whether we have a member function call.
         UInt paramCount = um.readParamCount();
 
-        // For a call with N arguments, the instruction will
-        // have N+1 operands.
-        UInt operandCount = inst->getArgCount();
-        UInt argCount = operandCount - 1;
-        UInt operandIndex = 1;
-
         if(argCount != paramCount)
         {
             // Looks like a member function call
-- 
cgit v1.2.3


From f4c4f63c0cfad93b1eacf9300eb8e06d2c78ccc9 Mon Sep 17 00:00:00 2001
From: Tim Foley <tfoley@nvidia.com>
Date: Tue, 7 Nov 2017 10:02:10 -0800
Subject: Fix for emitting subscript calls in HLSL/GLSL

The old approach was relying on an `__intrinsic_op` modifier to tell us we need to do something special with an `InvokeExpr`, but a previous change removed a bunch of those modifiers.

Instead, we will now check for calls to subscript declarations as part of the normal flow of emitting *any* call, similar to what is done for constructor calls already.

Eventually we should be able to eliminate the special case in the `__intrinsic_op` path, but I'm holding off on that because the AST emit logic can probably be cleaned up a *lot* once it doesn't have to be used for cross-compilation as well.
---
 source/slang/emit.cpp | 38 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)

(limited to 'source')

diff --git a/source/slang/emit.cpp b/source/slang/emit.cpp
index 615fb47a0..c67bb7b29 100644
--- a/source/slang/emit.cpp
+++ b/source/slang/emit.cpp
@@ -1614,6 +1614,32 @@ struct EmitVisitor
         }
     }
 
+    void emitSimpleSubscriptCallExpr(
+        RefPtr<InvokeExpr>  callExpr,
+        EOpInfo             /*outerPrec*/)
+    {
+        auto funcExpr = callExpr->FunctionExpr;
+
+        // We expect any subscript operation to be invoked as a member,
+        // so the function expression had better be in the correct form.
+        auto memberExpr = funcExpr.As<MemberExpr>();
+        if(!memberExpr)
+        {
+            SLANG_UNEXPECTED("subscript needs base expression");
+        }
+
+        Emit("(");
+        EmitExpr(memberExpr->BaseExpression);
+        Emit(")[");
+        UInt argCount = callExpr->Arguments.Count();
+        for (UInt aa = 0; aa < argCount; ++aa)
+        {
+            if (aa != 0) Emit(", ");
+            EmitExpr(callExpr->Arguments[aa]);
+        }
+        Emit("]");
+    }
+
     // Emit a call expression that doesn't involve any special cases,
     // just an expression of the form `f(a0, a1, ...)`
     void emitSimpleCallExpr(
@@ -1632,6 +1658,18 @@ struct EmitVisitor
                 emitSimpleConstructorCallExpr(callExpr, outerPrec);
                 return;
             }
+
+            if(auto acessorDeclRef = declRef.As<AccessorDecl>())
+            {
+                declRef = acessorDeclRef.GetParent();
+            }
+
+            if(auto subscriptDeclRef = declRef.As<SubscriptDecl>())
+            {
+                emitSimpleSubscriptCallExpr(callExpr, outerPrec);
+                return;
+            }
+
         }
 
         // Once we've ruled out constructor calls, we can move on
-- 
cgit v1.2.3


From ccea5702442a7a8303e6735a038be86939c1ce7a Mon Sep 17 00:00:00 2001
From: Tim Foley <tfoley@nvidia.com>
Date: Tue, 7 Nov 2017 10:22:53 -0800
Subject: Emit pointer-type parameters as out params

The IR encodes `out` and `in out` function parameters as pointer types, so the emit logic needs to handle it. We had code to handle translation of pointers types into `out` declarations for function *declarations* but weren't handling it for function *definitions*.

This change unifies the logic so that it is shared by function definitions and decalrations.

This change does *not* deal with the following issues that need to be addressed sometime soon-ish:

- We currently always translate pointers into `out`, even if they should be `in out`. This is obviously wrong.

- If/when we eventually have targets that support true pointers (e.g., CUDA, NVIDIA OpenGL, etc.) we'll need a way to tell the difference between an `in` pointer parameter, and an `out` parameter.

Both of these issues are meant to be addressed by having a few special cases of pointer types, for the `out` and `in out` cases, and only translating those (not all pointers). We need to plumb those through the IR more completely, but I'm not dealing with that here.
---
 source/slang/emit.cpp | 51 ++++++++++++++++++++++++++++++---------------------
 1 file changed, 30 insertions(+), 21 deletions(-)

(limited to 'source')

diff --git a/source/slang/emit.cpp b/source/slang/emit.cpp
index c67bb7b29..617e25d71 100644
--- a/source/slang/emit.cpp
+++ b/source/slang/emit.cpp
@@ -5698,7 +5698,8 @@ emitDeclImpl(decl, nullptr);
                 emit(", ");
 
             auto paramName = getIRName(pp);
-            emitIRType(ctx, pp->getType(), paramName);
+            auto paramType = pp->getType();
+            emitIRParamType(ctx, paramType, paramName);
 
             emitIRSemantics(ctx, pp);
         }
@@ -5724,6 +5725,33 @@ emitDeclImpl(decl, nullptr);
         }
     }
 
+    void emitIRParamType(
+        EmitContext*    ctx,
+        Type*           type,
+        String const&   name)
+    {
+        // An `out` or `inout` parameter will have been
+        // encoded as a parameter of pointer type, so
+        // we need to decode that here.
+        //
+        if( auto ptrType = type->As<PtrType>() )
+        {
+            // TODO: we need a way to distinguish `out`
+            // from `inout`. The easiest way to do
+            // that might be to have each be a distinct
+            // sub-case of `IRPtrType` - this would also
+            // ensure that they can be distinguished from
+            // real pointers when the user means to use
+            // them.
+
+            emit("out ");
+
+            type = ptrType->getValueType();
+        }
+
+        emitIRType(ctx, type, name);
+    }
+
     void emitIRFuncDecl(
         EmitContext*    ctx,
         IRFunc*         func)
@@ -5771,26 +5799,7 @@ emitDeclImpl(decl, nullptr);
             paramName.append(pp);
             auto paramType = funcType->getParamType(pp);
 
-            // An `out` or `inout` parameter will have been
-            // encoded as a parameter of pointer type, so
-            // we need to decode that here.
-            //
-            if( auto ptrType = paramType->As<PtrType>() )
-            {
-                // TODO: we need a way to distinguish `out`
-                // from `inout`. The easiest way to do
-                // that might be to have each be a distinct
-                // sub-case of `IRPtrType` - this would also
-                // ensure that they can be distinguished from
-                // real pointers when the user means to use
-                // them.
-
-                emit("out ");
-
-                paramType = ptrType->getValueType();
-            }
-
-            emitIRType(ctx, paramType, paramName);
+            emitIRParamType(ctx, paramType, paramName);
         }
         emit(");\n");
     }
-- 
cgit v1.2.3