#pragma warning disable CS0649 // Field is never assigned to
using ComLight;
using System.ComponentModel;
using System.Runtime.InteropServices;
namespace Whisper.Internal
{
/// Size of the buffers owned by the object
public readonly struct sTranscribeLength
{
/// Count of segments
public readonly int countSegments;
/// Total count of tokens, for all segments combined
public readonly int countTokens;
}
/// Output data from the model
[ComInterface( "2871a73f-5ce3-48f8-8779-6582ee11935e", eMarshalDirection.ToManaged ), CustomConventions( typeof( NativeLogger ) )]
public interface iTranscribeResult
{
/// Get size of the buffers
[RetValIndex, EditorBrowsable( EditorBrowsableState.Never )]
public sTranscribeLength getSize();
/// Pointer to segment data, a vector of structures
[EditorBrowsable( EditorBrowsableState.Never )]
public IntPtr getSegments();
/// Pointer to tokens data, a vector of structures
[EditorBrowsable( EditorBrowsableState.Never )]
public IntPtr getTokens();
}
}
namespace Whisper
{
/// Start and end times of a segment or token
/// The times are relative to the start of the media
public readonly struct sTimeInterval
{
/// Start time
public readonly TimeSpan begin;
/// End time
public readonly TimeSpan end;
}
/// Segment data
public readonly struct sSegment
{
internal readonly IntPtr m_text;
/// Segment text
public string? text => Marshal.PtrToStringUTF8( m_text );
/// Start and end times of the segment
public readonly sTimeInterval time;
/// Slice of the tokens
public readonly int firstToken, countTokens;
}
/// Token flags
[Flags]
public enum eTokenFlags: uint
{
/// The token is special
Special = 1,
}
/// Token data
public readonly struct sToken
{
internal readonly IntPtr m_text;
/// Token text
public string? text => Marshal.PtrToStringUTF8( m_text );
/// Start and end times of the token
public readonly sTimeInterval time;
/// Probability of the token
public readonly float probability;
/// Probability of the timestamp token
public readonly float probabilityTimestamp;
/// Sum of probabilities of all timestamp tokens
public readonly float ptsum;
/// Voice length of the token
public readonly float vlen;
/// Token id
public readonly int id;
/// Token flags
readonly eTokenFlags flags;
/// True if the token flags has the specified bit set
public bool hasFlag( eTokenFlags bit ) => flags.HasFlag( bit );
}
/// Output data from the model
public readonly ref struct TranscribeResult
{
/// Segments in the results
public readonly ReadOnlySpan segments;
/// Tokens in the results, for all segments
public readonly ReadOnlySpan tokens;
internal TranscribeResult( Internal.iTranscribeResult i )
{
Internal.sTranscribeLength len = i.getSize();
unsafe
{
// This does not copy the buffers to managed memory.
// Instead, the C# spans directly reference the native memory stored in these std::vectors
if( len.countSegments > 0 )
segments = new ReadOnlySpan( (void*)i.getSegments(), len.countSegments );
else
segments = ReadOnlySpan.Empty;
if( len.countTokens > 0 )
tokens = new ReadOnlySpan( (void*)i.getTokens(), len.countTokens );
else
tokens = ReadOnlySpan.Empty;
}
}
/// Get tokens for the specified segment
public ReadOnlySpan getTokens( in sSegment seg ) =>
tokens.Slice( seg.firstToken, seg.countTokens );
}
}