diff options
| author | Yong He <yonghe@outlook.com> | 2023-10-19 03:49:42 -0700 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2023-10-19 18:49:42 +0800 |
| commit | d001a7b5eee4400150816e9962adaff183bfff35 (patch) | |
| tree | e6c7d25258aba6056f231886d55cbb6963859c42 /tools | |
| parent | 7826afcaad78cc33c976bb3db3cdc9eada4c77e8 (diff) | |
Add a tool to dump/replay compute pipeline creation from gfx. (#3275)
* Add a tool to dump/replay compute pipeline creation from gfx.
* Fix x86 build.
---------
Co-authored-by: Yong He <yhe@nvidia.com>
Diffstat (limited to 'tools')
| -rw-r--r-- | tools/gfx/vulkan/vk-device.cpp | 19 | ||||
| -rw-r--r-- | tools/gfx/vulkan/vk-pipeline-dump-layer.cpp | 152 | ||||
| -rw-r--r-- | tools/gfx/vulkan/vk-pipeline-dump-layer.h | 11 | ||||
| -rw-r--r-- | tools/vk-pipeline-create/main.cpp | 287 |
4 files changed, 469 insertions, 0 deletions
diff --git a/tools/gfx/vulkan/vk-device.cpp b/tools/gfx/vulkan/vk-device.cpp index f8bc9c3aa..fe3680eda 100644 --- a/tools/gfx/vulkan/vk-device.cpp +++ b/tools/gfx/vulkan/vk-device.cpp @@ -15,9 +15,12 @@ #include "vk-swap-chain.h" #include "vk-transient-heap.h" #include "vk-vertex-layout.h" +#include "vk-pipeline-dump-layer.h" #include "vk-helper-functions.h" +#include "source/core/slang-platform.h" + #ifdef GFX_NV_AFTERMATH # include "GFSDK_Aftermath.h" # include "GFSDK_Aftermath_Defines.h" @@ -32,8 +35,20 @@ using namespace Slang; namespace vk { +static bool shouldDumpPipeline() +{ + StringBuilder dumpPipelineSettings; + PlatformUtil::getEnvironmentVariable(toSlice("SLANG_GFX_DUMP_PIPELINE"), dumpPipelineSettings); + return dumpPipelineSettings.produceString() == "1"; +} + DeviceImpl::~DeviceImpl() { + if (shouldDumpPipeline()) + { + writePipelineDump(toSlice("gfx-vk-pipeline-dump.bin")); + } + // Check the device queue is valid else, we can't wait on it.. if (m_deviceQueue.isValid()) { @@ -782,6 +797,10 @@ Result DeviceImpl::initVulkanInstanceAndDevice( SLANG_RETURN_ON_FAIL(m_api.initDeviceProcs(m_device)); + if (shouldDumpPipeline()) + { + installPipelineDumpLayer(m_api); + } return SLANG_OK; } diff --git a/tools/gfx/vulkan/vk-pipeline-dump-layer.cpp b/tools/gfx/vulkan/vk-pipeline-dump-layer.cpp new file mode 100644 index 000000000..959eee15d --- /dev/null +++ b/tools/gfx/vulkan/vk-pipeline-dump-layer.cpp @@ -0,0 +1,152 @@ +#include "vk-pipeline-dump-layer.h" +#include "core/slang-basic.h" +#include "core/slang-stream.h" +namespace gfx { + using namespace Slang; + + struct PipelineDumpContext + { + Dictionary<VkPipelineLayout, Index> pipelineLayouts; + Dictionary<VkShaderModule, Index> shaderModules; + Dictionary<VkDescriptorSetLayout, Index> descriptorSets; + Dictionary<VkPipeline, Index> computePipelines; + + List<uint8_t> serializedBytes; + + VulkanApi api; + + template<typename T> + void writeRaw(T v) + { + auto startIndex = serializedBytes.getCount(); + serializedBytes.growToCount(startIndex + sizeof(T)); + memcpy(serializedBytes.getBuffer() + startIndex, &v, sizeof(T)); + } + + template<typename T> + void writeArray(uint32_t elementCount, const T* data) + { + writeRaw(elementCount); + + auto startIndex = serializedBytes.getCount(); + serializedBytes.growToCount(startIndex + sizeof(T) * elementCount); + memcpy(serializedBytes.getBuffer() + startIndex, data, sizeof(T) * elementCount); + } + + void writeStr(const char* str) + { + auto len = (uint32_t)strlen(str) + 1; + writeRaw(len); + + auto startIndex = serializedBytes.getCount(); + serializedBytes.growToCount(startIndex + len); + memcpy(serializedBytes.getBuffer() + startIndex, str, len - 1); + serializedBytes[startIndex + len - 1] = 0; + } + + void writePipelineLayout(VkPipelineLayout layout, const VkPipelineLayoutCreateInfo* createInfo) + { + auto startIndex = serializedBytes.getCount(); + writeRaw(createInfo->sType); + writeRaw(createInfo->flags); + writeRaw(createInfo->setLayoutCount); + for (uint32_t i = 0; i < createInfo->setLayoutCount; i++) + writeRaw(descriptorSets.getValue(createInfo->pSetLayouts[i])); + writeArray(createInfo->pushConstantRangeCount, createInfo->pPushConstantRanges); + pipelineLayouts[layout] = startIndex; + } + + void writeShaderModule(VkShaderModule module, const VkShaderModuleCreateInfo* createInfo) + { + auto startIndex = serializedBytes.getCount(); + writeRaw(createInfo->sType); + writeRaw(createInfo->flags); + writeArray((uint32_t)(createInfo->codeSize/sizeof(uint32_t)), createInfo->pCode); + shaderModules[module] = startIndex; + } + + void writeDescriptorSetLayout(VkDescriptorSetLayout layout, const VkDescriptorSetLayoutCreateInfo* createInfo) + { + auto startIndex = serializedBytes.getCount(); + writeRaw(createInfo->sType); + writeRaw(createInfo->flags); + writeArray(createInfo->bindingCount, createInfo->pBindings); + descriptorSets[layout] = startIndex; + } + + void writePipeline(VkPipeline pipeline, const VkComputePipelineCreateInfo* createInfo) + { + auto startIndex = serializedBytes.getCount(); + writeRaw(createInfo->sType); + writeRaw(createInfo->flags); + writeRaw(createInfo->stage.sType); + writeRaw(createInfo->stage.flags); + writeRaw(createInfo->stage.stage); + writeRaw(shaderModules.getValue(createInfo->stage.module)); + writeStr(createInfo->stage.pName); + writeRaw(pipelineLayouts.getValue(createInfo->layout)); + computePipelines[pipeline] = startIndex; + } + + void writeToFile(UnownedStringSlice path) + { + RefPtr<FileStream> fs = new FileStream(); + fs->init(path, FileMode::Create); + uint32_t pipelineCount = (uint32_t)computePipelines.getCount(); + fs->write(&pipelineCount, sizeof(uint32_t)); + for (auto& pair : computePipelines) + { + fs->write(KeyValueDetail::getValue(&pair), sizeof(Index)); + } + Index blobSize = serializedBytes.getCount(); + fs->write(&blobSize, sizeof(blobSize)); + fs->write(serializedBytes.getBuffer(), serializedBytes.getCount()); + fs->close(); + } + }; + + PipelineDumpContext dumpContext; + + VkResult SLANG_MCALL createPipelineLayout(VkDevice device, const VkPipelineLayoutCreateInfo* createInfo, const VkAllocationCallbacks* callbacks, VkPipelineLayout* outLayout) + { + auto result = dumpContext.api.vkCreatePipelineLayout(device, createInfo, callbacks, outLayout); + dumpContext.writePipelineLayout(*outLayout, createInfo); + return result; + } + + VkResult SLANG_MCALL createComputePipelines(VkDevice device, VkPipelineCache cache, uint32_t createInfoCount, const VkComputePipelineCreateInfo* createInfos, const VkAllocationCallbacks* callbacks, VkPipeline* outPipelines) + { + auto result = dumpContext.api.vkCreateComputePipelines(device, cache, createInfoCount, createInfos, callbacks, outPipelines); + for (uint32_t i = 0; i < createInfoCount; i++) + dumpContext.writePipeline(outPipelines[i], createInfos + i); + return result; + } + + VkResult SLANG_MCALL createShaderModule(VkDevice device, const VkShaderModuleCreateInfo* createInfo, const VkAllocationCallbacks* callbacks, VkShaderModule* outShaderModule) + { + auto result = dumpContext.api.vkCreateShaderModule(device, createInfo, callbacks, outShaderModule); + dumpContext.writeShaderModule(*outShaderModule, createInfo); + return result; + } + + VkResult SLANG_MCALL createDescriptorSetLayout(VkDevice device, const VkDescriptorSetLayoutCreateInfo* createInfo, const VkAllocationCallbacks* callbacks, VkDescriptorSetLayout* outDescSetLayout) + { + auto result = dumpContext.api.vkCreateDescriptorSetLayout(device, createInfo, callbacks, outDescSetLayout); + dumpContext.writeDescriptorSetLayout(*outDescSetLayout, createInfo); + return result; + } + + void installPipelineDumpLayer(VulkanApi& api) + { + dumpContext.api = api; + api.vkCreatePipelineLayout = createPipelineLayout; + api.vkCreateComputePipelines = createComputePipelines; + api.vkCreateShaderModule = createShaderModule; + api.vkCreateDescriptorSetLayout = createDescriptorSetLayout; + } + + void writePipelineDump(UnownedStringSlice path) + { + dumpContext.writeToFile(path); + } +} // renderer_test diff --git a/tools/gfx/vulkan/vk-pipeline-dump-layer.h b/tools/gfx/vulkan/vk-pipeline-dump-layer.h new file mode 100644 index 000000000..c514f7f3e --- /dev/null +++ b/tools/gfx/vulkan/vk-pipeline-dump-layer.h @@ -0,0 +1,11 @@ +// vk-api.cpp +#include "vk-api.h" + +#include "core/slang-string.h" + +namespace gfx { + +void installPipelineDumpLayer(VulkanApi& api); +void writePipelineDump(Slang::UnownedStringSlice path); + +} // renderer_test diff --git a/tools/vk-pipeline-create/main.cpp b/tools/vk-pipeline-create/main.cpp new file mode 100644 index 000000000..893eecfd1 --- /dev/null +++ b/tools/vk-pipeline-create/main.cpp @@ -0,0 +1,287 @@ +// main.cpp + +// This tools reads a gfx pipeline dump file and replays the pipeline creation to trigger +// shader compilation in the driver. +// +#include <slang.h> +#include <slang-com-ptr.h> + +#include "examples/hello-world/vulkan-api.h" +#include "../../source/core/slang-string-util.h" +#include "../../source/core/slang-stream.h" +#include "slang-gfx.h" +#include <chrono> + +#if SLANG_WINDOWS_FAMILY +# include <windows.h> +#else +# include <dlfcn.h> +#endif + +using namespace Slang; + +struct PipelineCreationReplay +{ + // The Vulkan functions pointers result from loading the vulkan library. + VulkanAPI vkAPI; + + Dictionary<Index, VkPipelineLayout> pipelineLayouts; + Dictionary<Index, VkDescriptorSetLayout> descSetLayouts; + Dictionary<Index, VkShaderModule> shaderModules; + Dictionary<Index, VkPipeline> pipelines; + + VkPipelineLayout pipelineLayout = VK_NULL_HANDLE; + VkPipeline pipeline = VK_NULL_HANDLE; + + int initVulkanInstanceAndDevice(); + + List<uint8_t> fileBlob; + List<Index> pipelineOffsets; + + struct Reader + { + Index position; + List<uint8_t>& fileBlob; + Reader(List<uint8_t>& blob, Index pos) : fileBlob(blob), position(pos) {} + template<typename T> + void readRaw(T& val) + { + memcpy(&val, fileBlob.getBuffer() + position, sizeof(T)); + position += sizeof(T); + } + + Index readIndex() + { + Index index; + readRaw(index); + return index; + } + + uint32_t readUInt32() + { + uint32_t index; + readRaw(index); + return index; + } + + const char* readString() + { + uint32_t len = readUInt32(); + auto result = (const char*)fileBlob.getBuffer() + position; + position += len; + return result; + } + + const char* getPtr() { return (const char*)fileBlob.getBuffer() + position; } + }; + + VkShaderModule loadShaderModule(Index offset) + { + VkShaderModule shader = VK_NULL_HANDLE; + if (shaderModules.tryGetValue(offset, shader)) + return shader; + + Reader reader(fileBlob, offset); + VkShaderModuleCreateInfo createInfo = {}; + reader.readRaw(createInfo.sType); + reader.readRaw(createInfo.flags); + createInfo.codeSize = reader.readUInt32(); + createInfo.codeSize *= sizeof(uint32_t); + createInfo.pCode = (uint32_t*)reader.getPtr(); + vkAPI.vkCreateShaderModule(vkAPI.device, &createInfo, nullptr, &shader); + shaderModules[offset] = shader; + + return shader; + } + + VkDescriptorSetLayout loadDescriptorSetLayout(Index offset) + { + VkDescriptorSetLayout layout = VK_NULL_HANDLE; + if (descSetLayouts.tryGetValue(offset, layout)) + return layout; + Reader reader(fileBlob, offset); + VkDescriptorSetLayoutCreateInfo createInfo = {}; + reader.readRaw(createInfo.sType); + reader.readRaw(createInfo.flags); + reader.readRaw(createInfo.bindingCount); + List<VkDescriptorSetLayoutBinding> bindings; + bindings.setCount(createInfo.bindingCount); + memcpy(bindings.getBuffer(), reader.getPtr(), sizeof(VkDescriptorSetLayoutBinding) * bindings.getCount()); + createInfo.pBindings = bindings.getBuffer(); + + vkAPI.vkCreateDescriptorSetLayout(vkAPI.device, &createInfo, nullptr, &layout); + descSetLayouts[offset] = layout; + return layout; + } + + VkPipelineLayout loadPipelineLayout(Index offset) + { + VkPipelineLayout layout = VK_NULL_HANDLE; + if (pipelineLayouts.tryGetValue(offset, layout)) + return layout; + + Reader reader(fileBlob, offset); + VkPipelineLayoutCreateInfo createInfo = {}; + reader.readRaw(createInfo.sType); + reader.readRaw(createInfo.flags); + reader.readRaw(createInfo.setLayoutCount); + List<VkDescriptorSetLayout> setLayouts; + for (uint32_t i = 0; i < createInfo.setLayoutCount; i++) + { + setLayouts.add(loadDescriptorSetLayout(reader.readIndex())); + } + createInfo.pSetLayouts = setLayouts.getBuffer(); + reader.readRaw(createInfo.pushConstantRangeCount); + List<VkPushConstantRange> pushConstants; + pushConstants.setCount(createInfo.pushConstantRangeCount); + memcpy(pushConstants.getBuffer(), reader.getPtr(), sizeof(VkPushConstantRange) * createInfo.pushConstantRangeCount); + createInfo.pPushConstantRanges = pushConstants.getBuffer(); + + vkAPI.vkCreatePipelineLayout(vkAPI.device, &createInfo, nullptr, &layout); + pipelineLayouts[offset] = layout; + return layout; + } + + void loadPipeline(Index id, Index offset) + { + printf("Creating pipeline %d...", (int)id); + + Reader reader(fileBlob, offset); + VkComputePipelineCreateInfo createInfo = {}; + reader.readRaw(createInfo.sType); + reader.readRaw(createInfo.flags); + reader.readRaw(createInfo.stage.sType); + reader.readRaw(createInfo.stage.flags); + reader.readRaw(createInfo.stage.stage); + createInfo.stage.module = loadShaderModule(reader.readIndex()); + createInfo.stage.pName = reader.readString(); + createInfo.layout = loadPipelineLayout(reader.readIndex()); + + VkPipeline pipeline = VK_NULL_HANDLE; + + auto startTime = std::chrono::high_resolution_clock::now(); + + if (vkAPI.vkCreateComputePipelines(vkAPI.device, VK_NULL_HANDLE, 1, &createInfo, nullptr, &pipeline) == 0) + printf("done"); + else + printf("failed"); + + auto endTime = std::chrono::high_resolution_clock::now(); + auto elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(endTime - startTime); + printf(" in %.2fs.\n", elapsed.count() / 1000.0); + + vkAPI.vkDestroyPipeline(vkAPI.device, pipeline, nullptr); + } + + int createComputePipelineFromShader(UnownedStringSlice path, Int pipelineIndex) + { + RefPtr<FileStream> f = new FileStream(); + f->init(path, FileMode::Open); + uint32_t pipelineCount; + size_t readBytes; + f->read(&pipelineCount, sizeof(uint32_t), readBytes); + for (uint32_t i = 0; i < pipelineCount; ++i) + { + Index offset; + f->read(&offset, sizeof(Index), readBytes); + pipelineOffsets.add(offset); + } + Index blobSize; + f->read(&blobSize, sizeof(Index), readBytes); + fileBlob.setCount(blobSize); + f->read(fileBlob.getBuffer(), sizeof(uint8_t) * blobSize, readBytes); + + if (pipelineIndex == -1) + { + for (Index i = 0; i < pipelineOffsets.getCount(); ++i) + { + loadPipeline(i, pipelineOffsets[i]); + } + } + else if (pipelineIndex < pipelineOffsets.getCount()) + { + loadPipeline(pipelineIndex, pipelineOffsets[pipelineIndex]); + } + + for (auto p: descSetLayouts) + vkAPI.vkDestroyDescriptorSetLayout(vkAPI.device, *KeyValueDetail::getValue(&p), nullptr); + for (auto p : pipelineLayouts) + vkAPI.vkDestroyPipelineLayout(vkAPI.device, *KeyValueDetail::getValue(&p), nullptr); + for (auto p : shaderModules) + vkAPI.vkDestroyShaderModule(vkAPI.device, *KeyValueDetail::getValue(&p), nullptr); + + return 0; + } + + int run(int argc, const char** argv); + + void initVulkanAPI(gfx::IDevice* device); +}; + +int main(int argc, const char** argv) +{ + PipelineCreationReplay app; + return app.run(argc, argv); +} + +int PipelineCreationReplay::run(int argc, const char** argv) +{ + gfx::IDevice::Desc deviceDesc = {}; + deviceDesc.deviceType = gfx::DeviceType::Vulkan; + ComPtr<gfx::IDevice> device; + gfx::gfxCreateDevice(&deviceDesc, device.writeRef()); + initVulkanAPI(device); + + if (argc < 2) + { + printf("Usage: vk-pipeline-create <path-to-pipeline-file> [pipeline-index]\n"); + return -1; + } + UnownedStringSlice path = UnownedStringSlice(argv[1]); + Int pipelineIndex = -1; + if (argc > 2) + { + StringUtil::parseInt(UnownedStringSlice(argv[2]), pipelineIndex); + } + + RETURN_ON_FAIL(createComputePipelineFromShader(path, pipelineIndex)); + + vkAPI.vkDestroyDevice = nullptr; + vkAPI.vkDestroyDebugReportCallbackEXT = nullptr; + vkAPI.vkDestroyInstance = nullptr; + return 0; +} + +void PipelineCreationReplay::initVulkanAPI(gfx::IDevice* device) +{ + gfx::IDevice::InteropHandles handle; + device->getNativeDeviceHandles(&handle); + vkAPI.device = (VkDevice)(handle.handles[2].handleValue); + vkAPI.instance = (VkInstance)(handle.handles[0].handleValue); +#if SLANG_WINDOWS_FAMILY + auto dynamicLibraryName = "vulkan-1.dll"; + HMODULE module = ::LoadLibraryA(dynamicLibraryName); + vkAPI.vulkanLibraryHandle = (void*)module; +#define VK_API_GET_GLOBAL_PROC(x) vkAPI.x = (PFN_##x)GetProcAddress(module, #x); +#else + auto dynamicLibraryName = "libvulkan.so.1"; + vkAPI.vulkanLibraryHandle = dlopen(dynamicLibraryName, RTLD_NOW); +#define VK_API_GET_GLOBAL_PROC(x) vkAPI.x = (PFN_##x)dlsym(vkAPI.vulkanLibraryHandle, #x); +#endif + + // Initialize all the global functions. + VK_API_ALL_GLOBAL_PROCS(VK_API_GET_GLOBAL_PROC); + + vkAPI.initInstanceProcs(); + vkAPI.initDeviceProcs(); +} + +int PipelineCreationReplay::initVulkanInstanceAndDevice() +{ + if (initializeVulkanDevice(vkAPI) != 0) + { + printf("Failed to load Vulkan.\n"); + return -1; + } + return 0; +} |
