1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
|
#pragma once
#include <stdint.h>
#include <assert.h>
namespace Whisper
{
enum struct eModelImplementation : uint32_t
{
GPU = 1,
Hybrid = 2,
Reference = 3,
};
struct sTimeSpanFields
{
uint32_t days;
uint8_t hours, minutes, seconds;
uint32_t ticks;
sTimeSpanFields( uint64_t tt )
{
ticks = (uint32_t)( tt % 10'000'000 );
tt /= 10'000'000;
seconds = (uint8_t)( tt % 60 );
tt /= 60;
minutes = (uint8_t)( tt % 60 );
tt /= 60;
hours = (uint8_t)( tt % 24 );
tt /= 24;
days = (uint32_t)tt;
}
};
struct sTimeSpan
{
uint64_t ticks;
operator sTimeSpanFields() const
{
return sTimeSpanFields{ ticks };
}
void operator=( uint64_t tt )
{
ticks = tt;
}
void operator=( int64_t tt )
{
assert( tt >= 0 );
ticks = (uint64_t)tt;
}
};
// Start and end times of the segment or token, expressed in 100-nanosecond ticks
struct sTimeInterval
{
sTimeSpan begin, end;
};
// Segment data
struct sSegment
{
// Segment text, null-terminated, and probably UTF-8 encoded
const char* text;
// Start and end times of the segment
sTimeInterval time;
uint32_t firstToken, countTokens;
};
enum eTokenFlags : uint32_t
{
None = 0,
Special = 1,
};
inline bool operator &( eTokenFlags a, eTokenFlags b )
{
return 0 != ( (uint32_t)a & (uint32_t)b );
}
// Token data
struct sToken
{
// Token text, null-terminated, and probably UTF-8 encoded
const char* text;
// Start and end times of the token
sTimeInterval time;
// Probability of the token
float probability;
// Probability of the timestamp token
float probabilityTimestamp;
// Sum of probabilities of all timestamp tokens
float ptsum;
// Voice length of the token
float vlen;
// Token id
int id;
eTokenFlags flags;
};
struct sTranscribeLength
{
uint32_t countSegments, countTokens;
};
enum struct eResultFlags : uint32_t
{
None = 0,
// Return individual tokens in addition to the segments
Tokens = 1,
// Return timestamps
Timestamps = 2,
// Create a new COM object for the results.
// Without this flag, the context returns a pointer to the COM object stored in the context.
// The content of that object is replaced every time you call iContext.getResults method
NewObject = 0x100,
};
inline eResultFlags operator |( eResultFlags a, eResultFlags b )
{
return (eResultFlags)( (uint32_t)a | (uint32_t)b );
}
inline bool operator &( eResultFlags a, eResultFlags b )
{
return 0 != ( (uint32_t)a & (uint32_t)b );
}
}
|