Skip to content

Replace vek with glam. #180

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jul 12, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions crates/cuda_std/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ repository = "https://github.com/Rust-GPU/Rust-CUDA"
readme = "../../README.md"

[dependencies]
glam = { version = ">=0.22", default-features = false, features = ["libm", "cuda", "bytemuck"] }
vek = { version = "0.17.1", default-features = false, features = ["libm"] }
cuda_std_macros = { version = "0.2", path = "../cuda_std_macros" }
half = "2.4.1"
Expand Down
2 changes: 2 additions & 0 deletions crates/cuda_std/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,9 @@ mod float_ext;
pub use cuda_std_macros::*;
pub use float::GpuFloat;
pub use float_ext::*;
pub use glam;
pub use half;
#[deprecated(note = "The `vek` module is deprecated, use `glam` instead.")]
pub use vek;

pub use half::{bf16, f16};
Expand Down
32 changes: 16 additions & 16 deletions crates/cuda_std/src/rt/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -152,23 +152,23 @@ impl<'a> From<&'a GridSize> for GridSize {
other.clone()
}
}
impl From<vek::Vec2<u32>> for GridSize {
fn from(vec: vek::Vec2<u32>) -> Self {
impl From<glam::UVec2> for GridSize {
fn from(vec: glam::UVec2) -> Self {
GridSize::xy(vec.x, vec.y)
}
}
impl From<vek::Vec3<u32>> for GridSize {
fn from(vec: vek::Vec3<u32>) -> Self {
impl From<glam::UVec3> for GridSize {
fn from(vec: glam::UVec3) -> Self {
GridSize::xyz(vec.x, vec.y, vec.z)
}
}
impl From<vek::Vec2<usize>> for GridSize {
fn from(vec: vek::Vec2<usize>) -> Self {
impl From<glam::USizeVec2> for GridSize {
fn from(vec: glam::USizeVec2) -> Self {
GridSize::xy(vec.x as u32, vec.y as u32)
}
}
impl From<vek::Vec3<usize>> for GridSize {
fn from(vec: vek::Vec3<usize>) -> Self {
impl From<glam::USizeVec3> for GridSize {
fn from(vec: glam::USizeVec3) -> Self {
GridSize::xyz(vec.x as u32, vec.y as u32, vec.z as u32)
}
}
Expand Down Expand Up @@ -228,23 +228,23 @@ impl<'a> From<&'a BlockSize> for BlockSize {
other.clone()
}
}
impl From<vek::Vec2<u32>> for BlockSize {
fn from(vec: vek::Vec2<u32>) -> Self {
impl From<glam::UVec2> for BlockSize {
fn from(vec: glam::UVec2) -> Self {
BlockSize::xy(vec.x, vec.y)
}
}
impl From<vek::Vec3<u32>> for BlockSize {
fn from(vec: vek::Vec3<u32>) -> Self {
impl From<glam::UVec3> for BlockSize {
fn from(vec: glam::UVec3) -> Self {
BlockSize::xyz(vec.x, vec.y, vec.z)
}
}
impl From<vek::Vec2<usize>> for BlockSize {
fn from(vec: vek::Vec2<usize>) -> Self {
impl From<glam::USizeVec2> for BlockSize {
fn from(vec: glam::USizeVec2) -> Self {
BlockSize::xy(vec.x as u32, vec.y as u32)
}
}
impl From<vek::Vec3<usize>> for BlockSize {
fn from(vec: vek::Vec3<usize>) -> Self {
impl From<glam::USizeVec3> for BlockSize {
fn from(vec: glam::USizeVec3) -> Self {
BlockSize::xyz(vec.x as u32, vec.y as u32, vec.z as u32)
}
}
34 changes: 17 additions & 17 deletions crates/cuda_std/src/thread.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
// TODO: write some docs about the terms used in this module.

use cuda_std_macros::gpu_only;
use vek::{Vec2, Vec3};
use glam::{UVec2, UVec3};

// different calling conventions dont exist in nvptx, so we just use C as a placeholder.
extern "C" {
Expand Down Expand Up @@ -152,9 +152,9 @@ pub fn grid_dim_z() -> u32 {
/// Gets the 3d index of the thread currently executing the kernel.
#[gpu_only]
#[inline(always)]
pub fn thread_idx() -> Vec3<u32> {
pub fn thread_idx() -> UVec3 {
unsafe {
Vec3::new(
UVec3::new(
__nvvm_thread_idx_x(),
__nvvm_thread_idx_y(),
__nvvm_thread_idx_z(),
Expand All @@ -165,9 +165,9 @@ pub fn thread_idx() -> Vec3<u32> {
/// Gets the 3d index of the block that the thread currently executing the kernel is located in.
#[gpu_only]
#[inline(always)]
pub fn block_idx() -> Vec3<u32> {
pub fn block_idx() -> UVec3 {
unsafe {
Vec3::new(
UVec3::new(
__nvvm_block_idx_x(),
__nvvm_block_idx_y(),
__nvvm_block_idx_z(),
Expand All @@ -179,9 +179,9 @@ pub fn block_idx() -> Vec3<u32> {
/// how many threads exist in each thread block in every direction.
#[gpu_only]
#[inline(always)]
pub fn block_dim() -> Vec3<u32> {
pub fn block_dim() -> UVec3 {
unsafe {
Vec3::new(
UVec3::new(
__nvvm_block_dim_x(),
__nvvm_block_dim_y(),
__nvvm_block_dim_z(),
Expand All @@ -193,9 +193,9 @@ pub fn block_dim() -> Vec3<u32> {
/// how many thread blocks exist in each grid in every direction.
#[gpu_only]
#[inline(always)]
pub fn grid_dim() -> Vec3<u32> {
pub fn grid_dim() -> UVec3 {
unsafe {
Vec3::new(
UVec3::new(
__nvvm_grid_dim_x(),
__nvvm_grid_dim_y(),
__nvvm_grid_dim_z(),
Expand All @@ -206,7 +206,7 @@ pub fn grid_dim() -> Vec3<u32> {
/// Gets the overall thread index, accounting for 1d/2d/3d block/grid dimensions. This
/// value is most commonly used for indexing into data and this index is guaranteed to
/// be unique for every single thread executing this kernel no matter the launch configuration.
///
///
/// For very simple kernels it may be faster to use a more simple index calculation, however,
/// it will be unsound if the kernel launches in a 2d/3d configuration.
#[gpu_only]
Expand All @@ -218,10 +218,10 @@ pub fn index() -> u32 {
let block_dim = block_dim();
let thread_idx = thread_idx();

let block_id = block_idx.x + block_idx.y * grid_dim.x
let block_id = block_idx.x + block_idx.y * grid_dim.x
+ grid_dim.x * grid_dim.y * block_idx.z;

block_id * block_dim.product()
block_id * block_dim.element_product()
+ (thread_idx.z * (block_dim.x * block_dim.y))
+ (thread_idx.y * block_dim.x) + thread_idx.x
}
Expand All @@ -232,26 +232,26 @@ pub fn index_1d() -> u32 {
}

#[inline(always)]
pub fn index_2d() -> Vec2<u32> {
pub fn index_2d() -> UVec2 {
let i = thread_idx_x() + block_idx_x() * block_dim_x();
let j = thread_idx_y() + block_idx_y() * block_dim_y();
Vec2::new(i, j)
UVec2::new(i, j)
}

#[inline(always)]
pub fn index_3d() -> Vec3<u32> {
pub fn index_3d() -> UVec3 {
let i = thread_idx_x() + block_idx_x() * block_dim_x();
let j = thread_idx_y() + block_idx_y() * block_dim_y();
let k = thread_idx_z() + block_idx_z() * block_dim_z();
Vec3::new(i, j, k)
UVec3::new(i, j, k)
}

/// Whether this is the first thread (not the first thread to be executing). This function is guaranteed
/// to only return true in a single thread that is invoking it. This is useful for only doing something
/// once.
#[inline(always)]
pub fn first() -> bool {
block_idx() == Vec3::zero() && thread_idx() == Vec3::zero()
block_idx() == UVec3::ZERO && thread_idx() == UVec3::ZERO
}

/// Gets the number of threads inside of a warp. Currently 32 threads on every GPU architecture.
Expand Down
1 change: 1 addition & 0 deletions crates/cust/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ Notable changes to this project will be documented in this file.

## Unreleased

- `cuda_std::vek` is now deprecated. Use `cuda_std::glam`.
- Add `memory::memcpy_dtoh` to allow copying from device to host.
- `DeviceSlice` is represented as a slice again, but as `[()]` instead of `[T]`.
- Reimplemented `Index` and `IndexMut` for `DeviceSlice` and removed `DeviceSlice::index`.
Expand Down
2 changes: 1 addition & 1 deletion crates/cust/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ cust_core = { path = "../cust_core", version = "0.1.0"}
cust_raw = { path = "../cust_raw", default-features = false, features = ["driver"] }
bitflags = "2.8"
cust_derive = { path = "../cust_derive", version = "0.2" }
glam = { version = "0.29.2", features=["cuda"], optional = true }
glam = { version = "0.30", features=["cuda"], optional = true }
mint = { version = "^0.5", optional = true }
num-complex = { version = "0.4.6", optional = true }
vek = { version = "0.17.1", optional = true, default-features = false }
Expand Down
50 changes: 50 additions & 0 deletions crates/cust/src/function.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,31 @@ impl From<vek::Vec3<usize>> for GridSize {
}
}

#[cfg(feature = "glam")]
impl From<glam::UVec2> for GridSize {
fn from(vec: glam::UVec2) -> Self {
GridSize::xy(vec.x, vec.y)
}
}
#[cfg(feature = "glam")]
impl From<glam::UVec3> for GridSize {
fn from(vec: glam::UVec3) -> Self {
GridSize::xyz(vec.x, vec.y, vec.z)
}
}
#[cfg(feature = "glam")]
impl From<glam::USizeVec2> for GridSize {
fn from(vec: glam::USizeVec2) -> Self {
GridSize::xy(vec.x as u32, vec.y as u32)
}
}
#[cfg(feature = "glam")]
impl From<glam::USizeVec3> for GridSize {
fn from(vec: glam::USizeVec3) -> Self {
GridSize::xyz(vec.x as u32, vec.y as u32, vec.z as u32)
}
}

/// Dimensions of a thread block, or the number of threads in a block.
///
/// Each component of a `BlockSize` must be at least 1. The maximum size depends on your device's
Expand Down Expand Up @@ -168,6 +193,31 @@ impl From<vek::Vec3<usize>> for BlockSize {
}
}

#[cfg(feature = "glam")]
impl From<glam::UVec2> for BlockSize {
fn from(vec: glam::UVec2) -> Self {
BlockSize::xy(vec.x, vec.y)
}
}
#[cfg(feature = "glam")]
impl From<glam::UVec3> for BlockSize {
fn from(vec: glam::UVec3) -> Self {
BlockSize::xyz(vec.x, vec.y, vec.z)
}
}
#[cfg(feature = "glam")]
impl From<glam::USizeVec2> for BlockSize {
fn from(vec: glam::USizeVec2) -> Self {
BlockSize::xy(vec.x as u32, vec.y as u32)
}
}
#[cfg(feature = "glam")]
impl From<glam::USizeVec3> for BlockSize {
fn from(vec: glam::USizeVec3) -> Self {
BlockSize::xyz(vec.x as u32, vec.y as u32, vec.z as u32)
}
}

/// All supported function attributes for [Function::get_attribute](struct.Function.html#method.get_attribute)
#[repr(u32)]
#[non_exhaustive]
Expand Down
2 changes: 1 addition & 1 deletion crates/cust_core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ readme = "../../README.md"

[dependencies]
vek = { version = "0.17.1", default-features=false, features=["libm"], optional = true }
glam = { version = "0.29.2", features=["cuda", "libm"], default-features=false, optional=true }
glam = { version = "0.30", features=["cuda", "libm"], default-features=false, optional=true }
mint = { version = "^0.5", optional = true }
half = { version = "2.4.1", optional = true }
num-complex = { version = "0.4.6", optional = true }
Expand Down
21 changes: 18 additions & 3 deletions crates/cust_core/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
#![no_std]

pub use _hidden::*;
pub use cust_derive::DeviceCopyCore as DeviceCopy;

Expand Down Expand Up @@ -143,6 +141,7 @@ pub mod _hidden {
{
}

#[allow(unused_macros)]
macro_rules! impl_device_copy_generic {
($($($strukt:ident)::+),* $(,)?) => {
$(
Expand All @@ -151,6 +150,7 @@ pub mod _hidden {
}
}

#[allow(unused_macros)]
macro_rules! impl_device_copy {
($($strukt:ty),* $(,)?) => {
$(
Expand All @@ -172,7 +172,22 @@ pub mod _hidden {

#[cfg(feature = "glam")]
impl_device_copy! {
glam::Vec2, glam::Vec3, glam::Vec4, glam::IVec2, glam::IVec3, glam::IVec4,
glam::BVec2, glam::BVec3, glam::BVec3A, glam::BVec4, glam::BVec4A,
glam::U8Vec2, glam::U8Vec3, glam::U8Vec4,
glam::I8Vec2, glam::I8Vec3, glam::I8Vec4,
glam::U16Vec2, glam::U16Vec3, glam::U16Vec4,
glam::I16Vec2, glam::I16Vec3, glam::I16Vec4,
glam::UVec2, glam::UVec3, glam::UVec4,
glam::IVec2, glam::IVec3, glam::IVec4,
glam::U64Vec2, glam::U64Vec3, glam::U64Vec4,
glam::I64Vec2, glam::I64Vec3, glam::I64Vec4,
glam::USizeVec2, glam::USizeVec3, glam::USizeVec4,
glam::Vec2, glam::Vec3, glam::Vec3A, glam::Vec4,
glam::DVec2, glam::DVec3, glam::DVec4,
glam::Mat2, glam::Mat3, glam::Mat3A, glam::Mat4,
glam::DMat2, glam::DMat3, glam::DMat4,
glam::Quat, glam::DQuat,
glam::Affine2, glam::Affine3A,
}

#[cfg(feature = "mint")]
Expand Down
1 change: 0 additions & 1 deletion crates/gpu_rand/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
#![deny(missing_docs)]
#![deny(missing_debug_implementations)]
#![allow(clippy::unreadable_literal)]
#![cfg_attr(target_os = "cuda", no_std)]
#![feature(doc_cfg)]

pub mod xoroshiro;
Expand Down
2 changes: 1 addition & 1 deletion crates/optix/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ cust = { version = "0.3", path = "../cust", features=["impl_mint"] }
cust_raw = { path = "../cust_raw", features=["driver"] }
cfg-if = "1.0.0"
bitflags = "2.9.0"
glam = { version = "0.29", features=["cuda", "libm"], default-features=false, optional=true }
glam = { version = "0.30", features=["cuda", "libm"], default-features=false, optional=true }
half = { version = "2.4.1", optional = true }
memoffset = "0.9.1"
mint = "0.5.9"
Expand Down
1 change: 1 addition & 0 deletions crates/optix/examples/ex03_window/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,4 @@ anyhow = "1.0.44"
glfw = "0.42.0"
gl = "0.14.0"
num-traits = "0.2.14"
glam = { version = "0.30", features = ["bytemuck"] }
4 changes: 2 additions & 2 deletions crates/optix/examples/ex03_window/src/gl_util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use gl;
use gl::types::{GLchar, GLenum, GLint, GLsizeiptr, GLuint, GLvoid};
use std::ffi::{CStr, CString};

use crate::vector::*;
use glam::Vec4;

pub struct Shader {
id: GLuint,
Expand Down Expand Up @@ -516,7 +516,7 @@ impl FullscreenQuad {
self.vertex_array.unbind();
}

pub fn update_texture(&self, data: &[V4f32]) {
pub fn update_texture(&self, data: &[Vec4]) {
unsafe {
gl::BindTexture(gl::TEXTURE_2D, self.texture_id);
gl::TexSubImage2D(
Expand Down
7 changes: 3 additions & 4 deletions crates/optix/examples/ex03_window/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@
mod renderer;
use renderer::Renderer;

mod vector;
pub use vector::*;
use glam::{IVec2, Vec4};
mod gl_util;
use gl_util::FullscreenQuad;
use glfw::{Action, Context, Key};
Expand Down Expand Up @@ -42,7 +41,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {

let mut fsq = FullscreenQuad::new(width, height).unwrap();

let mut image_data = vec![v4f32(0.0, 0.0, 0.0, 0.0); (width * height) as usize];
let mut image_data = vec![Vec4::new(0.0, 0.0, 0.0, 0.0); (width * height) as usize];

unsafe {
gl::Viewport(0, 0, fb_width, fb_height);
Expand All @@ -62,7 +61,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
renderer.resize(w, h)?;
width = w;
height = h;
image_data.resize((width * height) as usize, v4f32(0.0, 0.0, 0.0, 0.0));
image_data.resize((width * height) as usize, Vec4::new(0.0, 0.0, 0.0, 0.0));
}

renderer.render()?;
Expand Down
Loading
Loading