7 files changed, 319 insertions, 5 deletions
diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
new file mode 100644
index 000000000..441334055
--- /dev/null
+++ b/.github/workflows/benchmark.yml
@@ -0,0 +1,47 @@
+name: Benchmark
+
+on:
+  push:
+    branches: [master]
+    paths-ignore:
+      - 'docs/**'
+      - 'LICENCE'
+      - 'CONTRIBUTION.md'
+      - 'README.md'
+  pull_request:
+    branches: [master]
+    paths-ignore:
+      - 'docs/**'
+      - 'LICENCE'
+      - 'CONTRIBUTION.md'
+      - 'README.md'
+
+jobs:
+  build:
+    runs-on: [Windows, benchmark, self-hosted]
+    steps:
+      - uses: actions/checkout@v3
+        with:
+          submodules: 'true'
+          fetch-depth: '0'
+      - name: Common setup
+        uses: ./.github/actions/common-setup
+        with:
+          os: windows
+          compiler: cl
+          platform: x86_64
+          config: release
+          build-llvm: true
+      - name: Build Slang
+        run: |
+          cmake --preset default --fresh -DSLANG_SLANG_LLVM_FLAVOR=USE_SYSTEM_LLVM -DCMAKE_COMPILE_WARNING_AS_ERROR=false
+          cmake --workflow --preset release
+      - name: Setup
+        run: |
+          cd tests/mdl
+          pip install prettytable argparse
+      - name: Run benchmark
+        run: |
+          cd tests/mdl
+          Copy-Item -Path C:\slang-benchmarks -Destination . -Recurse
+          python compile.py --samples 16 --target dxil --ci
+\ No newline at end of file
diff --git a/source/core/slang-performance-profiler.cpp b/source/core/slang-performance-profiler.cpp
index b480e1c8e..f08b4998d 100644
--- a/source/core/slang-performance-profiler.cpp
+++ b/source/core/slang-performance-profiler.cpp
@@ -31,11 +31,14 @@ namespace Slang
         }
         virtual void getResult(StringBuilder& out) override
         {
-            for (auto func : data)
+            char buffer[512];
+            for (const auto& func : data)
             {
-                out << func.key << ": \t";
+                memset(buffer, 0, sizeof(buffer));
+                snprintf(buffer, sizeof(buffer), "[*] %30s", func.key);
+                out << buffer << " \t";
                 auto milliseconds = std::chrono::duration_cast< std::chrono::milliseconds >(func.value.duration);
-                out << func.value.invocationCount << "\t" << milliseconds.count() << "ms\n";
+                out << func.value.invocationCount << " \t" << milliseconds.count() << "ms\n";
             }
         }
         virtual void clear() override
diff --git a/source/core/slang-performance-profiler.h b/source/core/slang-performance-profiler.h
index 71b34d262..9895793c4 100644
--- a/source/core/slang-performance-profiler.h
+++ b/source/core/slang-performance-profiler.h
@@ -68,7 +68,9 @@ private:
     List<ProfileInfo> m_profilEntries;
 };
 
-#define SLANG_PROFILE PerformanceProfilerFuncRAIIContext _profileContext(__func__)
+#define SLANG_PROFILE            PerformanceProfilerFuncRAIIContext _profileContext(__func__)
+#define SLANG_PROFILE_SECTION(s) PerformanceProfilerFuncRAIIContext _profileContext##s(#s)
+
 }
 
 #endif
diff --git a/source/slang/slang-compiler.cpp b/source/slang/slang-compiler.cpp
index 1ef17df1d..f5b7ff428 100644
--- a/source/slang/slang-compiler.cpp
+++ b/source/slang/slang-compiler.cpp
@@ -3,6 +3,7 @@
 #include "../core/slang-basic.h"
 #include "../core/slang-platform.h"
 #include "../core/slang-io.h"
+#include "../core/slang-performance-profiler.h"
 #include "../core/slang-string-util.h"
 #include "../core/slang-hex-dump-util.h"
 #include "../core/slang-riff.h"
@@ -2297,6 +2298,7 @@ namespace Slang
 
     void EndToEndCompileRequest::generateOutput()
     {
+        SLANG_PROFILE;
         generateOutput(getSpecializedGlobalAndEntryPointsComponentType());
 
         // If we are in command-line mode, we might be expected to actually
diff --git a/source/slang/slang.cpp b/source/slang/slang.cpp
index 9ba02ee50..81171fdcc 100644
--- a/source/slang/slang.cpp
+++ b/source/slang/slang.cpp
@@ -2742,6 +2742,7 @@ static void _outputIncludes(const List<SourceFile*>& sourceFiles, SourceManager*
 void FrontEndCompileRequest::parseTranslationUnit(
     TranslationUnitRequest* translationUnit)
 {
+    SLANG_PROFILE;
     if (translationUnit->isChecked)
         return;
 
@@ -2924,6 +2925,7 @@ void FrontEndCompileRequest::checkAllTranslationUnits()
 
 void FrontEndCompileRequest::generateIR()
 {
+    SLANG_PROFILE;
     SLANG_AST_BUILDER_RAII(getLinkage()->getASTBuilder());
 
     // Our task in this function is to generate IR code
@@ -3021,6 +3023,7 @@ static SourceLanguage inferSourceLanguage(FrontEndCompileRequest* request)
 
 SlangResult FrontEndCompileRequest::executeActionsInner()
 {
+    SLANG_PROFILE_SECTION(frontEndExecute);
     SLANG_AST_BUILDER_RAII(getLinkage()->getASTBuilder());
 
     for (TranslationUnitRequest* translationUnit : translationUnits)
@@ -3046,7 +3049,11 @@ SlangResult FrontEndCompileRequest::executeActionsInner()
         return SLANG_FAIL;
 
     // Perform semantic checking on the whole collection
-    checkAllTranslationUnits();
+    {
+        SLANG_PROFILE_SECTION(SemanticChecking);
+        checkAllTranslationUnits();
+    }
+
     if (getSink()->getErrorCount() != 0)
         return SLANG_FAIL;
 
@@ -3172,6 +3179,7 @@ void EndToEndCompileRequest::init()
 
 SlangResult EndToEndCompileRequest::executeActionsInner()
 {
+    SLANG_PROFILE_SECTION(endToEndActions);
     // If no code-generation target was specified, then try to infer one from the source language,
     // just to make sure we can do something reasonable when invoked from the command line.
     //
@@ -6303,6 +6311,7 @@ SlangResult EndToEndCompileRequest::compile()
     if (getOptionSet().getBoolOption(CompilerOptionName::ReportDownstreamTime))
     {
         getSession()->getCompilerElapsedTime(&totalStartTime, &downstreamStartTime);
+        PerformanceProfiler::getProfiler()->clear();
     }
 #if !defined(SLANG_DEBUG_INTERNAL_ERROR)
     // By default we'd like to catch as many internal errors as possible,
@@ -6317,6 +6326,7 @@ SlangResult EndToEndCompileRequest::compile()
 
     try
     {
+        SLANG_PROFILE_SECTION(compileInner);
         res = executeActions();
     }
     catch (const AbortCompilationException& e)
diff --git a/tests/mdl/.gitignore b/tests/mdl/.gitignore
new file mode 100644
index 000000000..1719095d1
--- /dev/null
+++ b/tests/mdl/.gitignore
@@ -0,0 +1,4 @@
+slang-benchmarks
+targets/
+modules/
+*.json
+\ No newline at end of file
diff --git a/tests/mdl/compile.py b/tests/mdl/compile.py
new file mode 100644
index 000000000..0d995636d
--- /dev/null
+++ b/tests/mdl/compile.py
@@ -0,0 +1,246 @@
+import os
+import shutil
+import glob
+import subprocess
+import argparse
+import halo
+import sys
+import prettytable
+import json
+
+### Setup ###
+
+def clear_mkdir(dir):
+    if os.path.exists(dir):
+        shutil.rmtree(dir)
+    os.makedirs(dir, exist_ok=True)
+
+clear_mkdir('modules')
+clear_mkdir('targets')
+clear_mkdir('targets/generated')
+
+target_choices = ['spirv', 'spirv-glsl', 'dxil', 'dxil-embedded']
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--target', type=str, default='spirv', choices=target_choices)
+parser.add_argument('--samples', type=int, default=1)
+parser.add_argument('--output', type=str, default='benchmarks.json')
+parser.add_argument('--ci', action='store_true')
+
+args = parser.parse_args(sys.argv[1:])
+
+repo = 'slang-benchmarks'
+if args.ci:
+    repo = 'C:\\slang-benchmarks'
+
+if not os.path.exists(repo):
+    repo = 'ssh://git@gitlab-master.nvidia.com:12051/slang/slang-benchmarks.git'
+    command = f'git clone {repo}'
+    subprocess.check_output(command)
+
+dxc = 'dxc.exe'
+slangc = '..\\..\\build\\Release\\bin\\slangc.exe'
+target = args.target
+samples = args.samples
+
+if target == 'spirv':
+    target = 'spirv -emit-spirv-directly'
+    target_ext = 'spirv'
+    embed = False
+elif target == 'spirv-glsl':
+    target = 'spirv -emit-spirv-via-glsl'
+    target_ext = 'spirv'
+    embed = False
+elif target == 'dxil-embedded':
+    target_ext = 'dxil'
+    embed = True
+elif target == 'dxil':
+    target_ext = 'dxil'
+    embed = False
+
+print(f'slangc:  {slangc}')
+print(f'target:  {target}')
+print(f'samples: {samples}\n')
+
+### Utility ###
+
+def parse(results):
+    results = results.split('\n')
+    results = [ r for r in results if r.startswith('[*]') ]
+    results = [ r.split() for r in results ]
+    profile = {}
+    for r in results:
+        profile[r[1]] = float(r[-1][:-2])
+    return profile
+
+timings = {}
+def run(command, key):
+    profile = {}
+    for i in range(samples):
+        try:
+            results = subprocess.check_output(command, stderr=subprocess.STDOUT, shell=True).decode('utf-8')
+        except subprocess.CalledProcessError as exc:
+            print(exc.output.decode('utf-8'))
+            return
+            # exit(-1)
+
+        p = parse(results)
+        if len(profile) == 0:
+            profile = p
+        else:
+            for k, v in p.items():
+                profile.setdefault(k, 0)
+                profile[k] += v
+
+    for k in profile:
+        profile[k] /= samples
+
+    timings[key] = profile
+
+def compile_cmd(file, output, stage=None, entry=None, emit=False):
+    cmd = f'{slangc} -report-perf-benchmark {file}'
+
+    if stage:
+        cmd += f' -stage {stage}'
+        if entry:
+            cmd += f' -entry {entry}'
+        else:
+            cmd += f' -entry {stage}'
+
+    if emit:
+        cmd += f' -target {target_ext}'
+        output += '.' + target_ext
+    elif embed:
+        cmd += ' -embed-dxil'
+        cmd += ' -profile lib_6_6'
+        cmd += ' -incomplete-library'
+
+    cmd += f' -o {output}'
+
+    return cmd
+
+### Module precompilation ###
+
+modules = []
+
+for file in glob.glob(f'{repo}\\mdl\\*.slang'):
+    if file.endswith('hit.slang'):
+        run(compile_cmd(file, 'modules/closesthit.slang-module', stage='closesthit'), 'module/closesthit')
+        run(compile_cmd(file, 'modules/anyhit.slang-module', stage='anyhit'), 'module/anyhit')
+        run(compile_cmd(file, 'modules/shadow.slang-module', stage='anyhit', entry='shadow'), 'module/shadow')
+    else:
+        basename = os.path.basename(file)
+        run(compile_cmd(file, f'modules/{basename}-module'), 'module/' + file)
+        modules.append(f'modules/{basename}-module')
+
+    print(f'[I] compiled {file}.')
+
+### Entrypoint compilation ###
+hit = 'slang-benchmarks/mdl/hit.slang'
+files = ' '.join(modules)
+
+# Module
+cmd = compile_cmd(f'{files} modules/closesthit.slang-module', f'targets/dxr-ch-modules', stage='closesthit', emit=True)
+run(cmd, f'full/{target_ext}/module/closesthit')
+
+print(f'[I] compiled closesthit (module)')
+
+cmd = compile_cmd(f'{files} modules/anyhit.slang-module', f'targets/dxr-ah-modules', stage='anyhit', emit=True)
+run(cmd, f'full/{target_ext}/module/anyhit')
+
+print(f'[I] compiled anyhit (module)')
+
+cmd = compile_cmd(f'{files} modules/shadow.slang-module', f'targets/dxr-sh-modules', stage='anyhit', entry='shadow', emit=True)
+run(cmd, f'full/{target_ext}/module/shadow')
+
+print(f'[I] compiled shadow (module)')
+
+# Monolithic    
+cmd = compile_cmd(hit, f'targets/dxr-ch-mono', stage='closesthit', emit=True)
+run(cmd, f'full/{target_ext}/mono/closesthit')
+
+print(f'[I] compiled shadow (monolithic)')
+
+cmd = compile_cmd(hit, f'targets/dxr-ah-mono', stage='anyhit', emit=True)
+run(cmd, f'full/{target_ext}/mono/anyhit')
+
+print(f'[I] compiled shadow (monolithic)')
+
+cmd = compile_cmd(hit, f'targets/dxr-sh-mono', stage='anyhit', entry='shadow', emit=True)
+run(cmd, f'full/{target_ext}/mono/shadow')
+
+print(f'[I] compiled shadow (monolithic)')
+
+# Module precompilation time
+precompilation_time = 0
+for k in timings:
+    if k.startswith('module'):
+        precompilation_time += timings[k]['compileInner']
+
+timings[f'full/{target_ext}/precompilation'] = { 'compileInner': precompilation_time }
+
+# Output to benchmark file
+json_data = []
+for k, v in timings.items():
+    if not k.startswith('full'):
+        continue
+
+    name = k.split('/')[1:]
+    name = ' : '.join(reversed(name))
+
+    data = {
+        'name': name,
+        'unit': 'milliseconds',
+        'value': v['compileInner']
+    }
+
+    json_data.append(data)
+
+# TODO: append target to benchmark file name
+with open(args.output, 'w') as file:
+    json.dump(json_data, file, indent=4)
+
+# Generate readable Markdown as well
+print(4 * '\n')
+print('# Slang MDL benchmark results\n')
+print('## Module precompilation time\n')
+print(f'Total: **{timings[f'full/{target_ext}/precompilation']['compileInner']} ms**\n')
+
+print('## Module compilation for entry points\n')
+
+entries = [ 'Closest Hit', 'Any Hit', 'Shadow' ]
+prefixes = [ 'closesthit', 'anyhit', 'shadow' ]
+
+table = prettytable.PrettyTable()
+table.set_style(prettytable.MARKDOWN)
+table.field_names = [ 'Entry', 'Total' ]
+
+total = 0
+for entry, prefix in zip(entries, prefixes):
+    row = [ entry ]
+    db = timings[f'full/{target_ext}/module/{prefix}']
+    spCompile = db['compileInner']
+    row.append(f'{spCompile:.3f}s')
+    table.add_row(row)
+    total += spCompile
+
+print(f'Total: **{total} ms**\n')
+print(table, end='\n\n')
+
+print('## Monolithic compilation for entry points\n')
+
+table = prettytable.PrettyTable()
+table.set_style(prettytable.MARKDOWN)
+table.field_names = [ 'Entry', 'Total' ]
+
+total = 0
+for entry, prefix in zip(entries, prefixes):
+    row = [ entry ]
+    db = timings[f'full/{target_ext}/mono/{prefix}']
+    spCompile = db['compileInner']
+    row.append(f'{spCompile:.3f}s')
+    table.add_row(row)
+    total += spCompile
+
+print(f'Total: **{total} ms**\n')
+print(table, end='\n\n')
+\ No newline at end of file