25
25
#include < cstring>
26
26
#include < iostream>
27
27
#include < type_traits>
28
- #if defined(__F16C__)
29
- #include < x86intrin.h>
30
- #endif
31
28
32
29
namespace svs {
33
30
namespace float16 {
34
31
namespace detail {
35
32
36
- // TODO: Update to `bitcast` if its available in the standard library.
37
33
inline uint32_t bitcast_float_to_uint32 (const float x) {
38
34
static_assert (sizeof (float ) == sizeof (uint32_t ));
39
- uint32_t u;
40
- memcpy (&u, &x, sizeof (x));
41
- return u;
35
+ return std::bit_cast<uint32_t >(x);
42
36
}
43
37
44
38
inline float bitcast_uint32_to_float (const uint32_t x) {
45
39
static_assert (sizeof (float ) == sizeof (uint32_t ));
46
- float f;
47
- memcpy (&f, &x, sizeof (x));
48
- return f;
40
+ return std::bit_cast<float >(x);
49
41
}
50
42
51
43
// reference:
@@ -72,38 +64,12 @@ inline uint16_t float_to_float16_untyped_slow(const float x) {
72
64
0x7FFF ; // sign : normalized : denormalized : saturate
73
65
}
74
66
75
- // If the processor is new enough, we can use hardware intrinsics to perform the conversion
76
- // without using bit-level manipulation.
77
- //
78
- // Here, we check if the `F16C` set is enabled and if so, we define the intrinsic based
79
- // conversion functions.
80
- //
81
- // The entry point for users of the conversion is `*_to_*_untyped`, which will dispatch
82
- // to either the slow or fast version, depending on the architecture.
83
- #if defined(__F16C__)
84
- inline float float16_to_float_untyped_fast (const uint16_t x) {
85
- auto converted = _mm_cvtph_ps (_mm_set1_epi16 (std::bit_cast<int16_t >(x)));
86
- return _mm_cvtss_f32 (converted);
87
- }
88
- inline uint16_t float_to_float16_untyped_fast (const float x) {
89
- auto converted = _mm_cvtps_ph (__m128{x}, _MM_FROUND_NO_EXC);
90
- return _mm_extract_epi16 (converted, 0 );
91
- }
92
-
93
- inline float float16_to_float_untyped (const uint16_t x) {
94
- return float16_to_float_untyped_fast (x);
95
- }
96
- inline uint16_t float_to_float16_untyped (const float x) {
97
- return float_to_float16_untyped_fast (x);
98
- }
99
- #else
100
67
inline float float16_to_float_untyped (const uint16_t x) {
101
68
return float16_to_float_untyped_slow (x);
102
69
}
103
70
inline uint16_t float_to_float16_untyped (const float x) {
104
71
return float_to_float16_untyped_slow (x);
105
72
}
106
- #endif
107
73
} // namespace detail
108
74
109
75
// On GCC - we need to add this attribute so that Float16 members can appear inside
0 commit comments