blob: 52039ebac68742a379f270ded294202dc1aa889a (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
|
#pragma once
#include "../Whisper/WhisperModel.h"
#include "../CPU/MlContext.h"
#include "../CPU/BufferAllocator.h"
#include "KeyValueDownloader.h"
#include "../CPU/KvTensors.h"
// This version of the hybrid context uses the new, custom-built kernels
class HybridContext
{
CpuCompute::MlContext ml;
CpuCompute::VirtualAllocator allocCompute, allocComputeLayer;
class AllocSingle : public CpuCompute::iArenaAllocator
{
CpuCompute::LargeBuffer buffer;
size_t capacity = 0;
bool allocated = false;
// Inherited via iArenaAllocator
virtual void* allocate( size_t cb, size_t align ) override final;
public:
virtual void resetArena() override final;
};
AllocSingle allocLayerOutput;
const CpuCompute::DecoderTensors& model;
const Whisper::WhisperModel& whisperModel;
KeyValueDownloader kvCross;
CpuCompute::KvTensors kv;
class SetAllocatorRaii;
public:
HybridContext( const Whisper::WhisperModel& wm );
HRESULT create();
HRESULT downloadKeyValues( const DirectCompute::KeyValueBuffers& source )
{
return kvCross.download( source );
}
struct sDecParams
{
int n_threads;
int M;
};
HRESULT decode( const int* tokens, const int n_tokens, const int n_past, const sDecParams& dp, std::vector<float>& probs_out );
};
|