1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
|
#include "stdafx.h"
#include "miscUtils.h"
#include <cmath>
void setCurrentThreadName( const char* threadName )
{
const DWORD dwThreadID = GetCurrentThreadId();
// https://stackoverflow.com/a/10364541/126995
#pragma pack(push,8)
typedef struct tagTHREADNAME_INFO
{
DWORD dwType; // Must be 0x1000.
LPCSTR szName; // Pointer to name (in user addr space).
DWORD dwThreadID; // Thread ID (-1=caller thread).
DWORD dwFlags; // Reserved for future use, must be zero.
} THREADNAME_INFO;
#pragma pack(pop)
THREADNAME_INFO info;
info.dwType = 0x1000;
info.szName = threadName;
info.dwThreadID = dwThreadID;
info.dwFlags = 0;
constexpr DWORD MS_VC_EXCEPTION = 0x406D1388;
__try
{
RaiseException( MS_VC_EXCEPTION, 0, sizeof( info ) / sizeof( ULONG_PTR ), (ULONG_PTR*)&info );
}
__except( EXCEPTION_EXECUTE_HANDLER )
{
}
}
float computeScaling( int mul, int div )
{
#ifdef _DEBUG
const float ref = (float)std::pow( (double)mul / (double)div, -0.25 );
#endif
// Make int32 vector with both numbers
__m128i iv = _mm_cvtsi32_si128( mul );
iv = _mm_insert_epi32( iv, div, 1 );
// Convert both numbers to FP64
__m128d v = _mm_cvtepi32_pd( iv );
// Compute mul / div
v = _mm_div_sd( v, _mm_unpackhi_pd( v, v ) );
// Square root
v = _mm_sqrt_sd( v, v );
// 4-th root
v = _mm_sqrt_sd( v, v );
// Invert the value
v = _mm_div_sd( _mm_set_sd( 1.0 ), v );
// Downcast to FP32, and return the result
__m128 f32 = _mm_cvtsd_ss( _mm_setzero_ps(), v );
return _mm_cvtss_f32( f32 );
}
|