1 files changed, 266 insertions, 7 deletions
diff --git a/tools/render-test/cuda/cuda-compute-util.cpp b/tools/render-test/cuda/cuda-compute-util.cpp
index 48d73fa93..b2006a7e8 100644
--- a/tools/render-test/cuda/cuda-compute-util.cpp
+++ b/tools/render-test/cuda/cuda-compute-util.cpp
@@ -13,6 +13,14 @@
 
 #include <cuda_runtime_api.h>
 
+// TODO: should conditionalize this on OptiX being present
+#ifdef RENDER_TEST_OPTIX
+#include <optix.h>
+#include <optix_function_table_definition.h>
+#define _CRT_SECURE_NO_WARNINGS 1
+#include <optix_stubs.h>
+#endif
+
 namespace renderer_test {
 using namespace Slang;
 
@@ -111,6 +119,36 @@ static SlangResult _handleCUDAError(cudaError_t error)
 
 #define SLANG_CUDA_ASSERT_ON_FAIL(x) { auto _res = x; if (_isError(_res)) { SLANG_ASSERT(!"Failed CUDA call"); }; }
 
+#ifdef RENDER_TEST_OPTIX
+
+static bool _isError(OptixResult result) { return result != OPTIX_SUCCESS; }
+
+#if 1
+static SlangResult _handleOptixError(OptixResult result, char const* file, int line)
+{
+    fprintf(stderr, "%s(%d): optix: %s (%s)\n",
+        file,
+        line,
+        optixGetErrorString(result),
+        optixGetErrorName(result));
+    return SLANG_FAIL;
+}
+#define SLANG_OPTIX_HANDLE_ERROR(RESULT) _handleOptixError(RESULT, __FILE__, __LINE__)
+#else
+#define SLANG_OPTIX_HANDLE_ERROR(RESULT) SLANG_FAIL
+#endif
+
+#define SLANG_OPTIX_RETURN_ON_FAIL(EXPR) do { auto _res = EXPR; if(_isError(_res)) return SLANG_OPTIX_HANDLE_ERROR(_res); } while(0)
+
+void _optixLogCallback(unsigned int level, const char* tag, const char* message, void* userData)
+{
+    fprintf(stderr, "optix: %s (%s)\n",
+        message,
+        tag);
+}
+
+#endif
+
 class MemoryCUDAResource : public CUDAResource
 {
 public:
@@ -1202,17 +1240,238 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp
     ScopeCUDAContext cudaContext;
     SLANG_RETURN_ON_FAIL(cudaContext.init(0));
 
-    const Index index = outputAndLayout.output.findKernelDescIndex(StageType::Compute);
-    if (index < 0)
+
+    switch( outputAndLayout.output.desc.pipelineType )
     {
+    default:
         return SLANG_FAIL;
-    }
 
-    const auto& kernel = outputAndLayout.output.kernelDescs[index];
+    case PipelineType::Compute:
+        {
+            const Index index = outputAndLayout.output.findKernelDescIndex(StageType::Compute);
+            if (index < 0)
+            {
+                return SLANG_FAIL;
+            }
+
+            const auto& kernel = outputAndLayout.output.kernelDescs[index];
+
+            ScopeCUDAModule cudaModule;
+            SLANG_RETURN_ON_FAIL(cudaModule.load(kernel.codeBegin));
+            SLANG_RETURN_ON_FAIL(_compute(cudaContext, cudaModule, outputAndLayout, dispatchSize, outContext));
+        }
+        break;
+
+    case PipelineType::RayTracing:
+        {
+#ifdef RENDER_TEST_OPTIX
+            SLANG_OPTIX_RETURN_ON_FAIL(optixInit());
+
+            OptixDeviceContextOptions optixOptions = {};
+
+            // TODO: set log callback
+            optixOptions.logCallbackFunction = &_optixLogCallback;
+            optixOptions.logCallbackLevel = 4;
+
+            OptixDeviceContext optixContext = nullptr;
+            SLANG_OPTIX_RETURN_ON_FAIL(optixDeviceContextCreate(cudaContext, &optixOptions, &optixContext));
+
+            enum { kOptixLogSize = 2*1024 };
+            char log[kOptixLogSize];
+            size_t logSize = sizeof(log);
+
+            OptixPipelineCompileOptions optixPipelineCompileOptions = {};
+
+            // We need to load modules from the PTX code available to us,
+            // and then also create program groups from the kernels
+            // in those modules.
+            //
+            // For now we will only support program groups with a single
+            // kernel in them, and will create one per entry point.
+            //
+            Index entryPointCount = outputAndLayout.output.kernelDescs.getCount();
+            List<OptixProgramGroup> optixProgramGroups;
+            List<String> names;
+
+            OptixShaderBindingTable optixSBT = {};
+
+            for( Index ee = 0; ee < entryPointCount; ++ee )
+            {
+                auto& kernel = outputAndLayout.output.kernelDescs[ee];
 
-    ScopeCUDAModule cudaModule;
-    SLANG_RETURN_ON_FAIL(cudaModule.load(kernel.codeBegin));
-    SLANG_RETURN_ON_FAIL(_compute(cudaContext, cudaModule, outputAndLayout, dispatchSize, outContext));
+                OptixModuleCompileOptions optixModuleCompileOptions = {};
+
+                OptixModule optixModule;
+                SLANG_OPTIX_RETURN_ON_FAIL(optixModuleCreateFromPTX(
+                    optixContext,
+                    &optixModuleCompileOptions,
+                    &optixPipelineCompileOptions,
+                    (char const*) kernel.codeBegin,
+                    kernel.getCodeSize(),
+                    log,
+                    &logSize,
+                    &optixModule));
+
+
+                OptixProgramGroupOptions optixProgramGroupOptions = {};
+
+                OptixProgramGroupDesc optixProgramGroupDesc = {};
+                optixProgramGroupDesc.kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
+                optixProgramGroupDesc.raygen.module = optixModule;
+
+                String name = String("__raygen__") + kernel.entryPointName;
+                names.add(name);
+                optixProgramGroupDesc.raygen.entryFunctionName = name.begin();
+
+                OptixProgramGroup optixProgramGroup = nullptr;
+                SLANG_OPTIX_RETURN_ON_FAIL(optixProgramGroupCreate(
+                    optixContext,
+                    &optixProgramGroupDesc,
+                    1,
+                    &optixProgramGroupOptions,
+                    log,
+                    &logSize,
+                    &optixProgramGroup));
+
+                optixProgramGroups.add(optixProgramGroup);
+
+                {
+                    CUdeviceptr rayGenRecordPtr;
+                    size_t rayGenRecordSize = OPTIX_SBT_RECORD_HEADER_SIZE;
+
+                    SLANG_CUDA_RETURN_ON_FAIL(cudaMalloc((void**) &rayGenRecordPtr, rayGenRecordSize));
+
+                    struct { char data[OPTIX_SBT_RECORD_HEADER_SIZE]; } rayGenRecordData;
+                    SLANG_OPTIX_RETURN_ON_FAIL(optixSbtRecordPackHeader(optixProgramGroup, &rayGenRecordData));
+
+                    SLANG_CUDA_RETURN_ON_FAIL(cudaMemcpy(
+                        (void*) rayGenRecordPtr,
+                        &rayGenRecordData,
+                        rayGenRecordSize,
+                        cudaMemcpyHostToDevice));
+
+                    optixSBT.raygenRecord = rayGenRecordPtr;
+                }
+            }
+
+
+
+            OptixPipeline optixPipeline = nullptr;
+
+            OptixPipelineLinkOptions optixPipelineLinkOptions = {};
+            optixPipelineLinkOptions.maxTraceDepth = 5;
+            optixPipelineLinkOptions.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_FULL;
+            optixPipelineLinkOptions.overrideUsesMotionBlur = false;
+            SLANG_OPTIX_RETURN_ON_FAIL(optixPipelineCreate(
+                optixContext,
+                &optixPipelineCompileOptions,
+                &optixPipelineLinkOptions,
+                optixProgramGroups.getBuffer(),
+                (unsigned int)optixProgramGroups.getCount(),
+                log,
+                &logSize,
+                &optixPipeline));
+
+
+            {
+                // The OptiX API complains if we don't fill in a miss record
+                // in the SBT, so we will create a dummy one here to represent
+                // the lack of any miss shaders.
+                //
+                OptixProgramGroupOptions optixProgramGroupOptions = {};
+                OptixProgramGroupDesc missGroupDesc = {};
+                missGroupDesc.kind = OPTIX_PROGRAM_GROUP_KIND_MISS;
+                OptixProgramGroup missProgramGroup;
+                SLANG_OPTIX_RETURN_ON_FAIL(optixProgramGroupCreate(
+                    optixContext,
+                    &missGroupDesc,
+                    1,
+                    &optixProgramGroupOptions,
+                    log,
+                    &logSize,
+                    &missProgramGroup));
+
+
+                CUdeviceptr missRecordPtr;
+                size_t missRecordSize = OPTIX_SBT_RECORD_HEADER_SIZE;
+
+                SLANG_CUDA_RETURN_ON_FAIL(cudaMalloc((void**) &missRecordPtr, missRecordSize));
+
+                struct { char data[OPTIX_SBT_RECORD_HEADER_SIZE]; } missRecordData;
+                SLANG_OPTIX_RETURN_ON_FAIL(optixSbtRecordPackHeader(missProgramGroup, &missRecordData));
+
+                SLANG_CUDA_RETURN_ON_FAIL(cudaMemcpy(
+                    (void*) missRecordPtr,
+                    &missRecordData,
+                    missRecordSize,
+                    cudaMemcpyHostToDevice));
+
+                optixSBT.missRecordBase = missRecordPtr;
+                optixSBT.missRecordCount = 1;
+                optixSBT.missRecordStrideInBytes = missRecordSize;
+            }
+            {
+                // Okay, we also need a dummy hit group.
+
+                OptixProgramGroupOptions optixProgramGroupOptions = {};
+                OptixProgramGroupDesc hitGroupDesc = {};
+                hitGroupDesc.kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP;
+                OptixProgramGroup programGroup;
+                SLANG_OPTIX_RETURN_ON_FAIL(optixProgramGroupCreate(
+                    optixContext,
+                    &hitGroupDesc,
+                    1,
+                    &optixProgramGroupOptions,
+                    log,
+                    &logSize,
+                    &programGroup));
+
+
+                CUdeviceptr recordPtr;
+                size_t recordSize = OPTIX_SBT_RECORD_HEADER_SIZE;
+
+                SLANG_CUDA_RETURN_ON_FAIL(cudaMalloc((void**) &recordPtr, recordSize));
+
+                struct { char data[OPTIX_SBT_RECORD_HEADER_SIZE]; } recordData;
+                SLANG_OPTIX_RETURN_ON_FAIL(optixSbtRecordPackHeader(programGroup, &recordData));
+
+                SLANG_CUDA_RETURN_ON_FAIL(cudaMemcpy(
+                    (void*) recordPtr,
+                    &recordData,
+                    recordSize,
+                    cudaMemcpyHostToDevice));
+
+                optixSBT.hitgroupRecordBase = recordPtr;
+                optixSBT.hitgroupRecordCount = 1;
+                optixSBT.hitgroupRecordStrideInBytes = recordSize;
+            }
+
+            ScopeCUDAStream cudaStream;
+
+            CUdeviceptr globalParams = 0;
+            size_t globalParamsSize = 0;
+
+            unsigned int gridSizeX = 1;
+            unsigned int gridSizeY = 1;
+            unsigned int gridSizeZ = 1;
+
+
+            SLANG_OPTIX_RETURN_ON_FAIL(optixLaunch(
+                optixPipeline,
+                cudaStream,
+                globalParams,
+                globalParamsSize,
+                &optixSBT,
+                gridSizeX,
+                gridSizeY,
+                gridSizeZ));
+
+
+            SLANG_RETURN_ON_FAIL(cudaStream.sync());
+#endif
+        }
+        break;
+    }
 
     return SLANG_OK;
 }