Skip to content

Commit c9e1a34

Browse files
committed
The first draft of simulated double.
This can be compiled with cuda 12.6. It mainly focuses on the interface and forward the operation internally to double precision. If the corresponding instruction is also removed by the vendor, some of them might be able to simulate by more operations (or allowing reinterpret), but some of them like atomic on 64 bits or memory control will reuqire hardware.
1 parent e80ef1e commit c9e1a34

26 files changed

+943
-91
lines changed

accessor/cuda_helper.hpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors
1+
// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors
22
//
33
// SPDX-License-Identifier: BSD-3-Clause
44

@@ -25,6 +25,8 @@ namespace gko {
2525

2626
class half;
2727

28+
class custom_double;
29+
2830

2931
namespace acc {
3032
namespace detail {
@@ -40,6 +42,11 @@ struct cuda_type<gko::half> {
4042
using type = __half;
4143
};
4244

45+
template <>
46+
struct cuda_type<double> {
47+
using type = gko::custom_double;
48+
};
49+
4350
// Unpack cv and reference / pointer qualifiers
4451
template <typename T>
4552
struct cuda_type<const T> {

accessor/reduced_row_major_reference.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors
1+
// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors
22
//
33
// SPDX-License-Identifier: BSD-3-Clause
44

@@ -74,7 +74,7 @@ class reduced_storage
7474
operator=(arithmetic_type val) &&
7575
{
7676
storage_type* const GKO_ACC_RESTRICT r_ptr = ptr_;
77-
*r_ptr = val;
77+
*r_ptr = detail::implicit_explicit_conversion<storage_type>(val);
7878
return val;
7979
}
8080

accessor/scaled_reduced_row_major_reference.hpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors
1+
// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors
22
//
33
// SPDX-License-Identifier: BSD-3-Clause
44

@@ -76,7 +76,8 @@ class scaled_reduced_storage
7676
operator=(arithmetic_type val) &&
7777
{
7878
storage_type* const GKO_ACC_RESTRICT r_ptr = ptr_;
79-
*r_ptr = val / scalar_;
79+
*r_ptr =
80+
detail::implicit_explicit_conversion<storage_type>(val / scalar_);
8081
return val;
8182
}
8283

common/cuda_hip/base/device_matrix_data_kernels.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors
1+
// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors
22
//
33
// SPDX-License-Identifier: BSD-3-Clause
44

@@ -27,7 +27,7 @@ namespace components {
2727
// Although gko::is_nonzero is constexpr, it still shows calling __device__ in
2828
// __host__
2929
template <typename T>
30-
GKO_INLINE __device__ constexpr bool is_nonzero(T value)
30+
GKO_INLINE __device__ constexpr bool is_nonzero_(T value)
3131
{
3232
return value != zero<T>();
3333
}
@@ -43,7 +43,7 @@ void remove_zeros(std::shared_ptr<const DefaultExecutor> exec,
4343
// count nonzeros
4444
auto nnz = thrust::count_if(
4545
thrust_policy(exec), value_ptr, value_ptr + size,
46-
[] __device__(device_value_type value) { return is_nonzero(value); });
46+
[] __device__(device_value_type value) { return is_nonzero_(value); });
4747
if (nnz < size) {
4848
using tuple_type =
4949
thrust::tuple<IndexType, IndexType, device_value_type>;
@@ -59,7 +59,7 @@ void remove_zeros(std::shared_ptr<const DefaultExecutor> exec,
5959
as_device_type(new_values.get_data())));
6060
thrust::copy_if(thrust_policy(exec), it, it + size, out_it,
6161
[] __device__(tuple_type entry) {
62-
return is_nonzero(thrust::get<2>(entry));
62+
return is_nonzero_(thrust::get<2>(entry));
6363
});
6464
// swap out storage
6565
values = std::move(new_values);

common/cuda_hip/base/math.hpp

Lines changed: 64 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors
1+
// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors
22
//
33
// SPDX-License-Identifier: BSD-3-Clause
44

@@ -27,7 +27,7 @@
2727

2828

2929
#include "common/cuda_hip/base/thrust_macro.hpp"
30-
30+
#include "core/base/custom_double.hpp"
3131

3232
namespace gko {
3333

@@ -67,6 +67,14 @@ struct device_numeric_limits<__half> {
6767
}
6868
};
6969

70+
template <>
71+
GKO_INLINE constexpr custom_double one<custom_double>()
72+
{
73+
constexpr auto bits = static_cast<uint64>(
74+
0b0'01111111111'0000000000000000000000000000000000000000000000000000ull);
75+
return custom_double::create_from_bits(bits);
76+
}
77+
7078

7179
namespace detail {
7280

@@ -95,6 +103,9 @@ struct is_complex_impl<thrust::complex<T>> : public std::true_type {};
95103
template <>
96104
struct is_complex_or_scalar_impl<__half> : public std::true_type {};
97105

106+
template <>
107+
struct is_complex_or_scalar_impl<gko::custom_double> : public std::true_type {};
108+
98109
template <typename T>
99110
struct is_complex_or_scalar_impl<thrust::complex<T>>
100111
: public is_complex_or_scalar_impl<T> {};
@@ -125,6 +136,30 @@ GKO_ATTRIBUTES GKO_INLINE __half abs<__half>(const complex<__half>& z)
125136
}
126137

127138

139+
template <>
140+
GKO_ATTRIBUTES GKO_INLINE complex<gko::custom_double> sqrt<gko::custom_double>(
141+
const complex<gko::custom_double>& a)
142+
{
143+
auto result =
144+
sqrt(complex<double>(gko::custom_double::custom_to_native(a.real()),
145+
gko::custom_double::custom_to_native(a.imag())));
146+
return complex<gko::custom_double>(
147+
gko::custom_double::to_custom(result.real()),
148+
gko::custom_double::to_custom(result.imag()));
149+
}
150+
151+
152+
template <>
153+
GKO_ATTRIBUTES GKO_INLINE gko::custom_double abs<gko::custom_double>(
154+
const complex<gko::custom_double>& z)
155+
{
156+
auto result =
157+
abs(complex<double>(gko::custom_double::custom_to_native(z.real()),
158+
gko::custom_double::custom_to_native(z.imag())));
159+
return gko::custom_double::to_custom(result);
160+
}
161+
162+
128163
} // namespace thrust
129164
GKO_THRUST_NAEMSPACE_POSTFIX
130165

@@ -183,6 +218,33 @@ __device__ __forceinline__ bool is_finite(const thrust::complex<__half>& value)
183218
return is_finite(value.real()) && is_finite(value.imag());
184219
}
185220

221+
222+
__device__ __forceinline__ bool is_nan(const gko::custom_double& val)
223+
{
224+
return is_nan(gko::custom_double::custom_to_native(val));
225+
}
226+
227+
__device__ __forceinline__ bool is_nan(
228+
const thrust::complex<gko::custom_double>& val)
229+
{
230+
return is_nan(val.real()) || is_nan(val.imag());
231+
}
232+
233+
__device__ __forceinline__ gko::custom_double abs(const gko::custom_double& val)
234+
{
235+
return custom_double::to_custom(abs(custom_double::custom_to_native(val)));
236+
}
237+
238+
__device__ __forceinline__ gko::custom_double sqrt(
239+
const gko::custom_double& val)
240+
{
241+
return custom_double::to_custom(sqrt(custom_double::custom_to_native(val)));
242+
}
243+
244+
__device__ __forceinline__ bool is_finite(const gko::custom_double& value)
245+
{
246+
return is_finite(custom_double::custom_to_native(value));
247+
}
186248
#endif
187249

188250

common/cuda_hip/components/atomic.hpp

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors
1+
// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors
22
//
33
// SPDX-License-Identifier: BSD-3-Clause
44

@@ -240,6 +240,18 @@ __forceinline__ __device__ thrust::complex<double> atomic_add(
240240
}
241241

242242

243+
__forceinline__ __device__ thrust::complex<gko::custom_double> atomic_add(
244+
thrust::complex<gko::custom_double>* __restrict__ address,
245+
thrust::complex<gko::custom_double> val)
246+
{
247+
auto addr = reinterpret_cast<gko::custom_double*>(address);
248+
// Separate to real part and imag part
249+
auto real = atomic_add(addr, val.real());
250+
auto imag = atomic_add(addr + 1, val.imag());
251+
return {real, imag};
252+
}
253+
254+
243255
} // namespace GKO_DEVICE_NAMESPACE
244256
} // namespace kernels
245257
} // namespace gko

0 commit comments

Comments
 (0)