summaryrefslogtreecommitdiffstats
path: root/tools/gfx/cuda/cuda-device.cpp
diff options
context:
space:
mode:
authorjsmall-nvidia <jsmall@nvidia.com>2022-08-10 10:04:06 -0400
committerGitHub <noreply@github.com>2022-08-10 10:04:06 -0400
commit1378fffd9da094beb41b2db89b96f556c23ab6cb (patch)
treee0558c031bd4b1a013dc56b75c4caa3ff5442021 /tools/gfx/cuda/cuda-device.cpp
parent89083c4b50af8e48e70b25b63cc62aca21ab706c (diff)
Artifact and ICastable (#2351)
* #include an absolute path didn't work - because paths were taken to always be relative. * WIP with hierarchical enums. * Some small fixes and improvements around artifact desc related types. * Improvements around hierarchical enum. * Fixes to get Artifact types refactor to be able to execute tests. * Attempt to better categorize PTX. * Work around for potentially unused function warning. * Typo fix. * Simplify Artifact header. * Small improvements around Artifact kind/payload/style. * Added IDestroyable/ICastable * Add IArtifactList. * First impl of IArtifactUtil. * Use the ICastable interface for IArtifactRepresentation. * Added IArtifactRepresentation & IArtifactAssociated. * Add SLANG_OVERRIDE to avoid gcc/clang warning. * Fix calling convention issue on win32. * Fix missing SLANG_OVERRIDE. * First attempt at file abstraction around Artifact. * Added creation of lock file. * Move functionality for determining file paths to the IArtifactUtil. Add casting to ICastable. * Added some casting/finding mechanisms. * Simplify IArtifact interface, and use Items for file reps. * Fix problem with libraries on DXIL. * Split out ArtifactRepresentation. * Move ArtifactDesc functionality to ArtifactDescUtil. ArtifactInfoUtil becomes ArtifactDescUtil. * Split implementations from the interfaces for Artifact. * Use TypeTextUtil for target name outputting. * Add artifact impls. * Add ICastableList * Added UnknownCastableAdapter * Make ISlangSharedLibrary derive from ICastable, and remain backwards compatible with slang-llvm. * Refactor Representation on Artifact. * Make our ISlangBlobs also derive from ICastable. Make ISlangBlob atomic ref counted. * Fix typo.
Diffstat (limited to 'tools/gfx/cuda/cuda-device.cpp')
-rw-r--r--tools/gfx/cuda/cuda-device.cpp21
1 files changed, 15 insertions, 6 deletions
diff --git a/tools/gfx/cuda/cuda-device.cpp b/tools/gfx/cuda/cuda-device.cpp
index 1a4a142d0..be5dbbc96 100644
--- a/tools/gfx/cuda/cuda-device.cpp
+++ b/tools/gfx/cuda/cuda-device.cpp
@@ -1061,7 +1061,8 @@ SLANG_NO_THROW SlangResult SLANG_MCALL DeviceImpl::readTextureResource(
size_t* outPixelSize)
{
auto textureImpl = static_cast<TextureResourceImpl*>(texture);
- RefPtr<ListBlob> blob = new ListBlob();
+
+ List<uint8_t> blobData;
auto desc = textureImpl->getDesc();
auto width = desc->size.width;
@@ -1071,7 +1072,7 @@ SLANG_NO_THROW SlangResult SLANG_MCALL DeviceImpl::readTextureResource(
size_t pixelSize = sizeInfo.blockSizeInBytes / sizeInfo.pixelsPerBlock;
size_t rowPitch = width * pixelSize;
size_t size = height * rowPitch;
- blob->m_data.setCount((Index)size);
+ blobData.setCount((Index)size);
CUDA_MEMCPY2D copyParam;
memset(&copyParam, 0, sizeof(copyParam));
@@ -1080,7 +1081,7 @@ SLANG_NO_THROW SlangResult SLANG_MCALL DeviceImpl::readTextureResource(
copyParam.srcArray = textureImpl->m_cudaArray;
copyParam.dstMemoryType = CU_MEMORYTYPE_HOST;
- copyParam.dstHost = blob->m_data.getBuffer();
+ copyParam.dstHost = blobData.getBuffer();
copyParam.dstPitch = rowPitch;
copyParam.WidthInBytes = copyParam.dstPitch;
copyParam.Height = height;
@@ -1088,6 +1089,9 @@ SLANG_NO_THROW SlangResult SLANG_MCALL DeviceImpl::readTextureResource(
*outRowPitch = rowPitch;
*outPixelSize = pixelSize;
+
+ auto blob = ListBlob::moveCreate(blobData);
+
returnComPtr(outBlob, blob);
return SLANG_OK;
}
@@ -1099,13 +1103,18 @@ SLANG_NO_THROW Result SLANG_MCALL DeviceImpl::readBufferResource(
ISlangBlob** outBlob)
{
auto bufferImpl = static_cast<BufferResourceImpl*>(buffer);
- RefPtr<ListBlob> blob = new ListBlob();
- blob->m_data.setCount((Index)size);
+
+ List<uint8_t> blobData;
+
+ blobData.setCount((Index)size);
cudaMemcpy(
- blob->m_data.getBuffer(),
+ blobData.getBuffer(),
(uint8_t*)bufferImpl->m_cudaMemory + offset,
size,
cudaMemcpyDefault);
+
+ auto blob = ListBlob::moveCreate(blobData);
+
returnComPtr(outBlob, blob);
return SLANG_OK;
}