summaryrefslogtreecommitdiffstats
path: root/Whisper/API/TranscribeStructs.h
diff options
context:
space:
mode:
authorKonstantin <const@const.me>2023-01-16 14:52:43 +0100
committerKonstantin <const@const.me>2023-01-16 14:52:43 +0100
commit8c4603c73675958efc960fbd4bb599a2909d106a (patch)
tree714dc6fc9a1672d5fd7f89676b97e10959662abc /Whisper/API/TranscribeStructs.h
parent990a8d0dbaefc996244097397259e92758b15cce (diff)
Source codes
Diffstat (limited to 'Whisper/API/TranscribeStructs.h')
-rw-r--r--Whisper/API/TranscribeStructs.h127
1 files changed, 127 insertions, 0 deletions
diff --git a/Whisper/API/TranscribeStructs.h b/Whisper/API/TranscribeStructs.h
new file mode 100644
index 0000000..ac28357
--- /dev/null
+++ b/Whisper/API/TranscribeStructs.h
@@ -0,0 +1,127 @@
+#pragma once
+#include <stdint.h>
+#include <assert.h>
+
+namespace Whisper
+{
+ enum struct eModelImplementation : uint32_t
+ {
+ GPU = 1,
+ Hybrid = 2,
+ Reference = 3,
+ };
+
+ struct sTimeSpanFields
+ {
+ uint32_t days;
+ uint8_t hours, minutes, seconds;
+ uint32_t ticks;
+
+ sTimeSpanFields( uint64_t tt )
+ {
+ ticks = (uint32_t)( tt % 10'000'000 );
+ tt /= 10'000'000;
+ seconds = (uint8_t)( tt % 60 );
+ tt /= 60;
+ minutes = (uint8_t)( tt % 60 );
+ tt /= 60;
+ hours = (uint8_t)( tt % 24 );
+ tt /= 24;
+ days = (uint32_t)tt;
+ }
+ };
+
+ struct sTimeSpan
+ {
+ uint64_t ticks;
+
+ operator sTimeSpanFields() const
+ {
+ return sTimeSpanFields{ ticks };
+ }
+ void operator=( uint64_t tt )
+ {
+ ticks = tt;
+ }
+ void operator=( int64_t tt )
+ {
+ assert( tt >= 0 );
+ ticks = (uint64_t)tt;
+ }
+ };
+
+ // Start and end times of the segment or token, expressed in 100-nanosecond ticks
+ struct sTimeInterval
+ {
+ sTimeSpan begin, end;
+ };
+
+ // Segment data
+ struct sSegment
+ {
+ // Segment text, null-terminated, and probably UTF-8 encoded
+ const char* text;
+ // Start and end times of the segment
+ sTimeInterval time;
+ uint32_t firstToken, countTokens;
+ };
+
+ enum eTokenFlags : uint32_t
+ {
+ None = 0,
+ Special = 1,
+ };
+ inline bool operator &( eTokenFlags a, eTokenFlags b )
+ {
+ return 0 != ( (uint32_t)a & (uint32_t)b );
+ }
+
+ // Token data
+ struct sToken
+ {
+ // Token text, null-terminated, and probably UTF-8 encoded
+ const char* text;
+ // Start and end times of the token
+ sTimeInterval time;
+ // Probability of the token
+ float probability;
+ // Probability of the timestamp token
+ float probabilityTimestamp;
+ // Sum of probabilities of all timestamp tokens
+ float ptsum;
+ // Voice length of the token
+ float vlen;
+ // Token id
+ int id;
+ eTokenFlags flags;
+ };
+
+ struct sTranscribeLength
+ {
+ uint32_t countSegments, countTokens;
+ };
+
+ enum struct eResultFlags : uint32_t
+ {
+ None = 0,
+ // Return individual tokens in addition to the segments
+ Tokens = 1,
+ // Return timestamps
+ Timestamps = 2,
+
+ // Create a new COM object for the results.
+ // Without this flag, the context returns a pointer to the COM object stored in the context.
+ // The content of that object is replaced every time you call iContext.getResults method
+ NewObject = 0x100,
+ };
+
+ inline eResultFlags operator |( eResultFlags a, eResultFlags b )
+ {
+ return (eResultFlags)( (uint32_t)a | (uint32_t)b );
+ }
+
+ inline bool operator &( eResultFlags a, eResultFlags b )
+ {
+ return 0 != ( (uint32_t)a & (uint32_t)b );
+ }
+} \ No newline at end of file