From 644d2ede47d93c625d4d35090d39a84ffa2c3fdb Mon Sep 17 00:00:00 2001 From: Christian Legnitto Date: Mon, 31 Mar 2025 22:29:46 -0400 Subject: [PATCH 1/3] Replace `vek` with glam. This fixes https://github.com/Rust-GPU/Rust-CUDA/issues/179. --- crates/cuda_std/Cargo.toml | 1 + crates/cuda_std/src/lib.rs | 2 ++ crates/cuda_std/src/rt/mod.rs | 32 +++++++++---------- crates/cuda_std/src/thread.rs | 20 ++++++------ crates/cust/CHANGELOG.md | 1 + crates/optix_device/Cargo.toml | 9 ++++-- crates/optix_device/src/hit.rs | 2 +- crates/optix_device/src/lib.rs | 3 +- crates/optix_device/src/ray.rs | 2 +- crates/optix_device/src/sys.rs | 2 +- examples/cuda/path_tracer/Cargo.toml | 4 +-- examples/cuda/path_tracer/kernels/src/lib.rs | 9 +++--- .../cuda/path_tracer/kernels/src/render.rs | 2 +- .../path_tracer/kernels/src/render_kernels.rs | 7 ++-- examples/cuda/path_tracer/src/common.rs | 10 +++--- examples/cuda/path_tracer/src/cpu/mod.rs | 8 ++--- examples/cuda/path_tracer/src/cuda/data.rs | 2 +- examples/cuda/path_tracer/src/main.rs | 2 +- 18 files changed, 63 insertions(+), 55 deletions(-) diff --git a/crates/cuda_std/Cargo.toml b/crates/cuda_std/Cargo.toml index 4e120f53..209929ce 100644 --- a/crates/cuda_std/Cargo.toml +++ b/crates/cuda_std/Cargo.toml @@ -8,6 +8,7 @@ repository = "https://github.com/Rust-GPU/Rust-CUDA" readme = "../../README.md" [dependencies] +glam = { version = ">=0.22", default-features = false, features = ["libm", "cuda", "bytemuck"] } vek = { version = "0.17.1", default-features = false, features = ["libm"] } cuda_std_macros = { version = "0.2", path = "../cuda_std_macros" } half = "2.4.1" diff --git a/crates/cuda_std/src/lib.rs b/crates/cuda_std/src/lib.rs index 357922b5..752c07f1 100644 --- a/crates/cuda_std/src/lib.rs +++ b/crates/cuda_std/src/lib.rs @@ -49,7 +49,9 @@ mod float_ext; pub use cuda_std_macros::*; pub use float::GpuFloat; pub use float_ext::*; +pub use glam; pub use half; +#[deprecated(note = "The `vek` module is deprecated, use `glam` instead.")] pub use vek; pub use half::{bf16, f16}; diff --git a/crates/cuda_std/src/rt/mod.rs b/crates/cuda_std/src/rt/mod.rs index 36ce621d..ed6440ab 100644 --- a/crates/cuda_std/src/rt/mod.rs +++ b/crates/cuda_std/src/rt/mod.rs @@ -152,23 +152,23 @@ impl<'a> From<&'a GridSize> for GridSize { other.clone() } } -impl From> for GridSize { - fn from(vec: vek::Vec2) -> Self { +impl From for GridSize { + fn from(vec: glam::UVec2) -> Self { GridSize::xy(vec.x, vec.y) } } -impl From> for GridSize { - fn from(vec: vek::Vec3) -> Self { +impl From for GridSize { + fn from(vec: glam::UVec3) -> Self { GridSize::xyz(vec.x, vec.y, vec.z) } } -impl From> for GridSize { - fn from(vec: vek::Vec2) -> Self { +impl From for GridSize { + fn from(vec: glam::USizeVec2) -> Self { GridSize::xy(vec.x as u32, vec.y as u32) } } -impl From> for GridSize { - fn from(vec: vek::Vec3) -> Self { +impl From for GridSize { + fn from(vec: glam::USizeVec3) -> Self { GridSize::xyz(vec.x as u32, vec.y as u32, vec.z as u32) } } @@ -228,23 +228,23 @@ impl<'a> From<&'a BlockSize> for BlockSize { other.clone() } } -impl From> for BlockSize { - fn from(vec: vek::Vec2) -> Self { +impl From for BlockSize { + fn from(vec: glam::UVec2) -> Self { BlockSize::xy(vec.x, vec.y) } } -impl From> for BlockSize { - fn from(vec: vek::Vec3) -> Self { +impl From for BlockSize { + fn from(vec: glam::UVec3) -> Self { BlockSize::xyz(vec.x, vec.y, vec.z) } } -impl From> for BlockSize { - fn from(vec: vek::Vec2) -> Self { +impl From for BlockSize { + fn from(vec: glam::USizeVec2) -> Self { BlockSize::xy(vec.x as u32, vec.y as u32) } } -impl From> for BlockSize { - fn from(vec: vek::Vec3) -> Self { +impl From for BlockSize { + fn from(vec: glam::USizeVec3) -> Self { BlockSize::xyz(vec.x as u32, vec.y as u32, vec.z as u32) } } diff --git a/crates/cuda_std/src/thread.rs b/crates/cuda_std/src/thread.rs index 3a4d3432..59c0cec5 100644 --- a/crates/cuda_std/src/thread.rs +++ b/crates/cuda_std/src/thread.rs @@ -19,7 +19,7 @@ // TODO: write some docs about the terms used in this module. use cuda_std_macros::gpu_only; -use vek::{Vec2, Vec3}; +use glam::{UVec2, UVec3}; // different calling conventions dont exist in nvptx, so we just use C as a placeholder. extern "C" { @@ -152,7 +152,7 @@ pub fn grid_dim_z() -> u32 { /// Gets the 3d index of the thread currently executing the kernel. #[gpu_only] #[inline(always)] -pub fn thread_idx() -> Vec3 { +pub fn thread_idx() -> UVec3 { unsafe { Vec3::new( __nvvm_thread_idx_x(), @@ -165,7 +165,7 @@ pub fn thread_idx() -> Vec3 { /// Gets the 3d index of the block that the thread currently executing the kernel is located in. #[gpu_only] #[inline(always)] -pub fn block_idx() -> Vec3 { +pub fn block_idx() -> UVec3 { unsafe { Vec3::new( __nvvm_block_idx_x(), @@ -179,7 +179,7 @@ pub fn block_idx() -> Vec3 { /// how many threads exist in each thread block in every direction. #[gpu_only] #[inline(always)] -pub fn block_dim() -> Vec3 { +pub fn block_dim() -> UVec3 { unsafe { Vec3::new( __nvvm_block_dim_x(), @@ -193,7 +193,7 @@ pub fn block_dim() -> Vec3 { /// how many thread blocks exist in each grid in every direction. #[gpu_only] #[inline(always)] -pub fn grid_dim() -> Vec3 { +pub fn grid_dim() -> UVec3 { unsafe { Vec3::new( __nvvm_grid_dim_x(), @@ -232,18 +232,18 @@ pub fn index_1d() -> u32 { } #[inline(always)] -pub fn index_2d() -> Vec2 { +pub fn index_2d() -> UVec2 { let i = thread_idx_x() + block_idx_x() * block_dim_x(); let j = thread_idx_y() + block_idx_y() * block_dim_y(); - Vec2::new(i, j) + UVec2::new(i, j) } #[inline(always)] -pub fn index_3d() -> Vec3 { +pub fn index_3d() -> UVec3 { let i = thread_idx_x() + block_idx_x() * block_dim_x(); let j = thread_idx_y() + block_idx_y() * block_dim_y(); let k = thread_idx_z() + block_idx_z() * block_dim_z(); - Vec3::new(i, j, k) + UVec3::new(i, j, k) } /// Whether this is the first thread (not the first thread to be executing). This function is guaranteed @@ -251,7 +251,7 @@ pub fn index_3d() -> Vec3 { /// once. #[inline(always)] pub fn first() -> bool { - block_idx() == Vec3::zero() && thread_idx() == Vec3::zero() + block_idx() == UVec3::ZERO && thread_idx() == UVec3::ZERO } /// Gets the number of threads inside of a warp. Currently 32 threads on every GPU architecture. diff --git a/crates/cust/CHANGELOG.md b/crates/cust/CHANGELOG.md index 4dac02dc..904995ab 100644 --- a/crates/cust/CHANGELOG.md +++ b/crates/cust/CHANGELOG.md @@ -4,6 +4,7 @@ Notable changes to this project will be documented in this file. ## Unreleased +- `cuda_std::vek` is now deprecated. Use `cuda_std::glam`. - Add `memory::memcpy_dtoh` to allow copying from device to host. - `DeviceSlice` is represented as a slice again, but as `[()]` instead of `[T]`. - Reimplemented `Index` and `IndexMut` for `DeviceSlice` and removed `DeviceSlice::index`. diff --git a/crates/optix_device/Cargo.toml b/crates/optix_device/Cargo.toml index 2252eb41..f1f6575f 100644 --- a/crates/optix_device/Cargo.toml +++ b/crates/optix_device/Cargo.toml @@ -2,12 +2,17 @@ name = "optix_device" version = "0.1.0" edition = "2021" -authors = ["Anders Langlands ", "Riccardo D'Ambrosio "] +authors = [ + "Anders Langlands ", + "Riccardo D'Ambrosio " +] [dependencies] bitflags = "2.8" cuda_std = { version = "0.2", path = "../cuda_std" } -glam = { version = "0.29", features=["cuda", "libm"], default-features=false } paste = "1.0.15" seq-macro = "0.3.5" cust_core = { version = "0.1", path = "../cust_core" } + +[target.'cfg(not(target_os = "cuda"))'.dependencies] +glam = { version = "0.29", features = ["cuda"], default-features = false } diff --git a/crates/optix_device/src/hit.rs b/crates/optix_device/src/hit.rs index a907f004..27abe78e 100644 --- a/crates/optix_device/src/hit.rs +++ b/crates/optix_device/src/hit.rs @@ -1,6 +1,6 @@ #[cfg(target_os = "cuda")] use core::arch::asm; -use cuda_std::gpu_only; +use cuda_std::{glam, gpu_only}; use glam::Vec3; /// The type of primitive that a ray hit. #[repr(u32)] diff --git a/crates/optix_device/src/lib.rs b/crates/optix_device/src/lib.rs index 76c0539f..c13f28ad 100644 --- a/crates/optix_device/src/lib.rs +++ b/crates/optix_device/src/lib.rs @@ -14,8 +14,7 @@ pub mod trace; pub mod transform; pub mod util; -use cuda_std::*; -pub use glam; +use cuda_std::{glam, *}; use glam::UVec3; pub use misc::*; diff --git a/crates/optix_device/src/ray.rs b/crates/optix_device/src/ray.rs index 48939270..258ca99d 100644 --- a/crates/optix_device/src/ray.rs +++ b/crates/optix_device/src/ray.rs @@ -1,7 +1,7 @@ use crate::trace::*; #[cfg(target_os = "cuda")] use core::arch::asm; -use cuda_std::gpu_only; +use cuda_std::{glam, gpu_only}; use glam::Vec3; /// Returns the ray origin that was passed into [`trace`] in world-space. diff --git a/crates/optix_device/src/sys.rs b/crates/optix_device/src/sys.rs index b8c72713..946a0293 100644 --- a/crates/optix_device/src/sys.rs +++ b/crates/optix_device/src/sys.rs @@ -3,7 +3,7 @@ use crate::trace::{RayFlags, TraversableHandle}; #[cfg(target_os = "cuda")] use core::arch::asm; -use cuda_std::gpu_only; +use cuda_std::{glam, gpu_only}; use glam::Vec3; use paste::paste; diff --git a/examples/cuda/path_tracer/Cargo.toml b/examples/cuda/path_tracer/Cargo.toml index 46619e01..fc9680b4 100644 --- a/examples/cuda/path_tracer/Cargo.toml +++ b/examples/cuda/path_tracer/Cargo.toml @@ -4,10 +4,10 @@ version = "0.1.0" edition = "2018" [dependencies] -vek = { version = "0.17.1", features = ["bytemuck", "mint"] } +glam = { version = "0.30.1", features = ["bytemuck", "cuda"] } bytemuck = { version = "1.21", features = ["derive"] } cust = { version = "0.3", path = "../../../crates/cust", features = [ - "impl_vek", + "impl_glam", ] } image = "0.25.5" path-tracer-kernels = { path = "kernels" } diff --git a/examples/cuda/path_tracer/kernels/src/lib.rs b/examples/cuda/path_tracer/kernels/src/lib.rs index 51614f77..c3a0e578 100644 --- a/examples/cuda/path_tracer/kernels/src/lib.rs +++ b/examples/cuda/path_tracer/kernels/src/lib.rs @@ -11,20 +11,19 @@ pub mod render_kernels; pub mod scene; pub mod sphere; -pub use cuda_std::vek; +pub use cuda_std::glam; use cust_core::DeviceCopy; use enum_dispatch::enum_dispatch; use hittable::{HitRecord, Hittable}; use sphere::Sphere; -pub type Vec3 = vek::Vec3; -pub type Point = vek::Vec3; -pub type Vec2 = vek::Vec2; +use glam::{USizeVec2, Vec2, Vec3}; +pub type Point = Vec3; #[derive(Default, Clone, Copy, DeviceCopy)] #[repr(C)] pub struct Viewport { - pub bounds: vek::Vec2, + pub bounds: USizeVec2, pub lower_left: Vec3, pub horizontal: Vec3, pub vertical: Vec3, diff --git a/examples/cuda/path_tracer/kernels/src/render.rs b/examples/cuda/path_tracer/kernels/src/render.rs index 9767b4fd..c4fa7303 100644 --- a/examples/cuda/path_tracer/kernels/src/render.rs +++ b/examples/cuda/path_tracer/kernels/src/render.rs @@ -8,7 +8,7 @@ pub fn color(ray: Ray) -> Vec3 { (1.0 - t) * Vec3::one() + t * Vec3::new(0.5, 0.7, 1.0) } -pub fn generate_ray(idx: vek::Vec2, view: &Viewport, offset: Vec2) -> Ray { +pub fn generate_ray(idx: UVec2, view: &Viewport, offset: Vec2) -> Ray { let uv = (idx.numcast::().unwrap() + offset) / view.bounds.numcast().unwrap(); Ray { origin: view.origin, diff --git a/examples/cuda/path_tracer/kernels/src/render_kernels.rs b/examples/cuda/path_tracer/kernels/src/render_kernels.rs index ef7d8d96..a036a884 100644 --- a/examples/cuda/path_tracer/kernels/src/render_kernels.rs +++ b/examples/cuda/path_tracer/kernels/src/render_kernels.rs @@ -1,5 +1,6 @@ use crate::{render::*, scene::Scene, *}; -use cuda_std::{vek::Clamp, *}; +use cuda_std::*; +use glam::{U8Vec3, Vec2, Vec3}; use gpu_rand::{DefaultRand, GpuRand}; #[kernel] @@ -38,7 +39,7 @@ pub unsafe fn scale_buffer(fb: *const Vec3, out: *mut Vec3, samples: u32, view: /// Postprocesses a (scaled) buffer into a final u8 buffer. #[kernel] -pub unsafe fn postprocess(fb: *const Vec3, out: *mut vek::Vec3, view: Viewport) { +pub unsafe fn postprocess(fb: *const Vec3, out: *mut U8Vec3, view: Viewport) { let idx_2d = thread::index_2d(); if idx_2d.x >= view.bounds.x as u32 || idx_2d.y >= view.bounds.y as u32 { return; @@ -50,7 +51,7 @@ pub unsafe fn postprocess(fb: *const Vec3, out: *mut vek::Vec3, view: Viewpo let gamma_corrected = original.sqrt(); *out = (gamma_corrected * 255.0) - .clamped(Vec3::zero(), Vec3::broadcast(255.0)) + .clamp(Vec3::zero(), Vec3::broadcast(255.0)) .numcast() .unwrap(); } diff --git a/examples/cuda/path_tracer/src/common.rs b/examples/cuda/path_tracer/src/common.rs index 969ff946..36ab3d0e 100644 --- a/examples/cuda/path_tracer/src/common.rs +++ b/examples/cuda/path_tracer/src/common.rs @@ -1,14 +1,14 @@ +use glam::{Vec2, Vec3}; use glium::glutin::event::{ ElementState, Event, MouseButton, MouseScrollDelta, VirtualKeyCode, WindowEvent, }; use path_tracer_kernels::Viewport; -use vek::{Vec2, Vec3}; #[derive(Debug, Clone, Copy, PartialEq)] pub struct Camera { - pub origin: Vec3, - pub lookat: Vec3, - pub vup: Vec3, + pub origin: Vec3, + pub lookat: Vec3, + pub vup: Vec3, pub fov: f32, pub aspect_ratio: f32, } @@ -43,7 +43,7 @@ pub struct CameraController { } impl CameraController { - pub fn new(dimensions: Vec2) -> Self { + pub fn new(dimensions: USizeVec2) -> Self { CameraController { sensitivity: 0.1, last_mouse_pos: dimensions.numcast().unwrap() / 2.0, diff --git a/examples/cuda/path_tracer/src/cpu/mod.rs b/examples/cuda/path_tracer/src/cpu/mod.rs index 3fad1581..3a6117e5 100644 --- a/examples/cuda/path_tracer/src/cpu/mod.rs +++ b/examples/cuda/path_tracer/src/cpu/mod.rs @@ -1,5 +1,6 @@ use std::time::Duration; +use glam::{Clamp, Vec2, Vec3}; use gpu_rand::{DefaultRand, GpuRand}; use imgui::Ui; use path_tracer_kernels::{ @@ -7,13 +8,12 @@ use path_tracer_kernels::{ }; use rayon::prelude::*; use sysinfo::System; -use vek::{Clamp, Vec2, Vec3}; use crate::{common::Camera, cuda::SEED}; pub struct CpuRenderer { // this is basically the cuda buffers but not gpu buffers. - accumulated_buffer: Vec>, + accumulated_buffer: Vec, out_buffer: Vec>, viewport: Viewport, @@ -23,7 +23,7 @@ pub struct CpuRenderer { } impl CpuRenderer { - pub fn new(dimensions: Vec2, camera: &Camera, scene: &Scene) -> Self { + pub fn new(dimensions: USizeVec2, camera: &Camera, scene: &Scene) -> Self { let accumulated_buffer = vec![Vec3::zero(); dimensions.product()]; let out_buffer = vec![Vec3::zero(); dimensions.product()]; @@ -67,7 +67,7 @@ impl CpuRenderer { new_camera.as_viewport(&mut self.viewport); } - pub fn resize(&mut self, dimensions: Vec2) { + pub fn resize(&mut self, dimensions: USizeVec2) { self.accumulated_buffer .resize(dimensions.product(), Vec3::zero()); self.out_buffer.resize(dimensions.product(), Vec3::zero()); diff --git a/examples/cuda/path_tracer/src/cuda/data.rs b/examples/cuda/path_tracer/src/cuda/data.rs index d7f2d224..8b8eb5c3 100644 --- a/examples/cuda/path_tracer/src/cuda/data.rs +++ b/examples/cuda/path_tracer/src/cuda/data.rs @@ -5,9 +5,9 @@ use cust::{ memory::{DeviceBuffer, DeviceCopy, UnifiedBuffer}, util::SliceExt, }; +use glam::{Vec2, Vec3}; use gpu_rand::DefaultRand; use path_tracer_kernels::{material::MaterialKind, scene::Scene, Object, Viewport}; -use vek::{Vec2, Vec3}; use super::SEED; diff --git a/examples/cuda/path_tracer/src/main.rs b/examples/cuda/path_tracer/src/main.rs index d4ff4470..5d847774 100644 --- a/examples/cuda/path_tracer/src/main.rs +++ b/examples/cuda/path_tracer/src/main.rs @@ -6,6 +6,7 @@ pub mod renderer; pub mod viewer; use common::Camera; +use glam::Vec3; use path_tracer_kernels::{ material::{DielectricMaterial, DiffuseMaterial, MaterialKind, MetallicMaterial}, scene::Scene, @@ -13,7 +14,6 @@ use path_tracer_kernels::{ Object, }; use std::error::Error; -use vek::Vec3; pub const WIDTH: u32 = 1920; pub const HEIGHT: u32 = 1080; From 00be05b0dd45bceaeca23fb07655adebc96967a1 Mon Sep 17 00:00:00 2001 From: Jorge Ortega Date: Wed, 23 Apr 2025 23:09:26 -0700 Subject: [PATCH 2/3] Continue replacing `vek` with `glam` --- crates/cuda_std/src/thread.rs | 14 +++--- crates/cust/Cargo.toml | 2 +- crates/cust/src/function.rs | 50 +++++++++++++++++++ crates/cust_core/Cargo.toml | 2 +- crates/cust_core/src/lib.rs | 21 ++++++-- crates/gpu_rand/src/lib.rs | 1 - crates/optix/Cargo.toml | 2 +- crates/optix/examples/ex04_mesh/Cargo.toml | 2 +- crates/optix_device/Cargo.toml | 3 -- crates/optix_device/src/lib.rs | 3 +- crates/optix_device/src/trace.rs | 2 +- crates/optix_device/src/transform.rs | 2 +- crates/optix_device/src/util.rs | 2 +- examples/cuda/path_tracer/Cargo.toml | 2 +- examples/cuda/path_tracer/kernels/Cargo.toml | 4 +- examples/cuda/path_tracer/kernels/src/lib.rs | 3 +- .../cuda/path_tracer/kernels/src/material.rs | 19 ++++--- examples/cuda/path_tracer/kernels/src/math.rs | 6 +-- .../cuda/path_tracer/kernels/src/optix.rs | 20 ++++---- .../cuda/path_tracer/kernels/src/render.rs | 7 +-- .../path_tracer/kernels/src/render_kernels.rs | 7 ++- .../cuda/path_tracer/kernels/src/scene.rs | 6 +-- examples/cuda/path_tracer/src/common.rs | 16 +++--- examples/cuda/path_tracer/src/cpu/mod.rs | 28 +++++------ examples/cuda/path_tracer/src/cuda/data.rs | 24 ++++----- examples/cuda/path_tracer/src/cuda/mod.rs | 18 +++---- examples/cuda/path_tracer/src/main.rs | 2 +- examples/cuda/path_tracer/src/optix/mod.rs | 2 +- examples/cuda/path_tracer/src/renderer.rs | 6 +-- examples/cuda/path_tracer/src/viewer.rs | 14 ++++-- 30 files changed, 182 insertions(+), 108 deletions(-) diff --git a/crates/cuda_std/src/thread.rs b/crates/cuda_std/src/thread.rs index 59c0cec5..3a447c95 100644 --- a/crates/cuda_std/src/thread.rs +++ b/crates/cuda_std/src/thread.rs @@ -154,7 +154,7 @@ pub fn grid_dim_z() -> u32 { #[inline(always)] pub fn thread_idx() -> UVec3 { unsafe { - Vec3::new( + UVec3::new( __nvvm_thread_idx_x(), __nvvm_thread_idx_y(), __nvvm_thread_idx_z(), @@ -167,7 +167,7 @@ pub fn thread_idx() -> UVec3 { #[inline(always)] pub fn block_idx() -> UVec3 { unsafe { - Vec3::new( + UVec3::new( __nvvm_block_idx_x(), __nvvm_block_idx_y(), __nvvm_block_idx_z(), @@ -181,7 +181,7 @@ pub fn block_idx() -> UVec3 { #[inline(always)] pub fn block_dim() -> UVec3 { unsafe { - Vec3::new( + UVec3::new( __nvvm_block_dim_x(), __nvvm_block_dim_y(), __nvvm_block_dim_z(), @@ -195,7 +195,7 @@ pub fn block_dim() -> UVec3 { #[inline(always)] pub fn grid_dim() -> UVec3 { unsafe { - Vec3::new( + UVec3::new( __nvvm_grid_dim_x(), __nvvm_grid_dim_y(), __nvvm_grid_dim_z(), @@ -206,7 +206,7 @@ pub fn grid_dim() -> UVec3 { /// Gets the overall thread index, accounting for 1d/2d/3d block/grid dimensions. This /// value is most commonly used for indexing into data and this index is guaranteed to /// be unique for every single thread executing this kernel no matter the launch configuration. -/// +/// /// For very simple kernels it may be faster to use a more simple index calculation, however, /// it will be unsound if the kernel launches in a 2d/3d configuration. #[gpu_only] @@ -218,10 +218,10 @@ pub fn index() -> u32 { let block_dim = block_dim(); let thread_idx = thread_idx(); - let block_id = block_idx.x + block_idx.y * grid_dim.x + let block_id = block_idx.x + block_idx.y * grid_dim.x + grid_dim.x * grid_dim.y * block_idx.z; - block_id * block_dim.product() + block_id * block_dim.element_product() + (thread_idx.z * (block_dim.x * block_dim.y)) + (thread_idx.y * block_dim.x) + thread_idx.x } diff --git a/crates/cust/Cargo.toml b/crates/cust/Cargo.toml index 5dd045e9..8882dae3 100644 --- a/crates/cust/Cargo.toml +++ b/crates/cust/Cargo.toml @@ -17,7 +17,7 @@ cust_core = { path = "../cust_core", version = "0.1.0"} cust_raw = { path = "../cust_raw", default-features = false, features = ["driver"] } bitflags = "2.8" cust_derive = { path = "../cust_derive", version = "0.2" } -glam = { version = "0.29.2", features=["cuda"], optional = true } +glam = { version = "0.30", features=["cuda"], optional = true } mint = { version = "^0.5", optional = true } num-complex = { version = "0.4.6", optional = true } vek = { version = "0.17.1", optional = true, default-features = false } diff --git a/crates/cust/src/function.rs b/crates/cust/src/function.rs index a2dffae9..b01e889e 100644 --- a/crates/cust/src/function.rs +++ b/crates/cust/src/function.rs @@ -88,6 +88,31 @@ impl From> for GridSize { } } +#[cfg(feature = "glam")] +impl From for GridSize { + fn from(vec: glam::UVec2) -> Self { + GridSize::xy(vec.x, vec.y) + } +} +#[cfg(feature = "glam")] +impl From for GridSize { + fn from(vec: glam::UVec3) -> Self { + GridSize::xyz(vec.x, vec.y, vec.z) + } +} +#[cfg(feature = "glam")] +impl From for GridSize { + fn from(vec: glam::USizeVec2) -> Self { + GridSize::xy(vec.x as u32, vec.y as u32) + } +} +#[cfg(feature = "glam")] +impl From for GridSize { + fn from(vec: glam::USizeVec3) -> Self { + GridSize::xyz(vec.x as u32, vec.y as u32, vec.z as u32) + } +} + /// Dimensions of a thread block, or the number of threads in a block. /// /// Each component of a `BlockSize` must be at least 1. The maximum size depends on your device's @@ -168,6 +193,31 @@ impl From> for BlockSize { } } +#[cfg(feature = "glam")] +impl From for BlockSize { + fn from(vec: glam::UVec2) -> Self { + BlockSize::xy(vec.x, vec.y) + } +} +#[cfg(feature = "glam")] +impl From for BlockSize { + fn from(vec: glam::UVec3) -> Self { + BlockSize::xyz(vec.x, vec.y, vec.z) + } +} +#[cfg(feature = "glam")] +impl From for BlockSize { + fn from(vec: glam::USizeVec2) -> Self { + BlockSize::xy(vec.x as u32, vec.y as u32) + } +} +#[cfg(feature = "glam")] +impl From for BlockSize { + fn from(vec: glam::USizeVec3) -> Self { + BlockSize::xyz(vec.x as u32, vec.y as u32, vec.z as u32) + } +} + /// All supported function attributes for [Function::get_attribute](struct.Function.html#method.get_attribute) #[repr(u32)] #[non_exhaustive] diff --git a/crates/cust_core/Cargo.toml b/crates/cust_core/Cargo.toml index 5f24445b..c4b9d2c7 100644 --- a/crates/cust_core/Cargo.toml +++ b/crates/cust_core/Cargo.toml @@ -9,7 +9,7 @@ readme = "../../README.md" [dependencies] vek = { version = "0.17.1", default-features=false, features=["libm"], optional = true } -glam = { version = "0.29.2", features=["cuda", "libm"], default-features=false, optional=true } +glam = { version = "0.30", features=["cuda", "libm"], default-features=false, optional=true } mint = { version = "^0.5", optional = true } half = { version = "2.4.1", optional = true } num-complex = { version = "0.4.6", optional = true } diff --git a/crates/cust_core/src/lib.rs b/crates/cust_core/src/lib.rs index 463c0fba..40d4819e 100644 --- a/crates/cust_core/src/lib.rs +++ b/crates/cust_core/src/lib.rs @@ -1,5 +1,3 @@ -#![no_std] - pub use _hidden::*; pub use cust_derive::DeviceCopyCore as DeviceCopy; @@ -143,6 +141,7 @@ pub mod _hidden { { } + #[allow(unused_macros)] macro_rules! impl_device_copy_generic { ($($($strukt:ident)::+),* $(,)?) => { $( @@ -151,6 +150,7 @@ pub mod _hidden { } } + #[allow(unused_macros)] macro_rules! impl_device_copy { ($($strukt:ty),* $(,)?) => { $( @@ -172,7 +172,22 @@ pub mod _hidden { #[cfg(feature = "glam")] impl_device_copy! { - glam::Vec2, glam::Vec3, glam::Vec4, glam::IVec2, glam::IVec3, glam::IVec4, + glam::BVec2, glam::BVec3, glam::BVec3A, glam::BVec4, glam::BVec4A, + glam::U8Vec2, glam::U8Vec3, glam::U8Vec4, + glam::I8Vec2, glam::I8Vec3, glam::I8Vec4, + glam::U16Vec2, glam::U16Vec3, glam::U16Vec4, + glam::I16Vec2, glam::I16Vec3, glam::I16Vec4, + glam::UVec2, glam::UVec3, glam::UVec4, + glam::IVec2, glam::IVec3, glam::IVec4, + glam::U64Vec2, glam::U64Vec3, glam::U64Vec4, + glam::I64Vec2, glam::I64Vec3, glam::I64Vec4, + glam::USizeVec2, glam::USizeVec3, glam::USizeVec4, + glam::Vec2, glam::Vec3, glam::Vec3A, glam::Vec4, + glam::DVec2, glam::DVec3, glam::DVec4, + glam::Mat2, glam::Mat3, glam::Mat3A, glam::Mat4, + glam::DMat2, glam::DMat3, glam::DMat4, + glam::Quat, glam::DQuat, + glam::Affine2, glam::Affine3A, } #[cfg(feature = "mint")] diff --git a/crates/gpu_rand/src/lib.rs b/crates/gpu_rand/src/lib.rs index 2758140d..9ddeccaf 100644 --- a/crates/gpu_rand/src/lib.rs +++ b/crates/gpu_rand/src/lib.rs @@ -12,7 +12,6 @@ #![deny(missing_docs)] #![deny(missing_debug_implementations)] #![allow(clippy::unreadable_literal)] -#![cfg_attr(target_os = "cuda", no_std)] #![feature(doc_cfg)] pub mod xoroshiro; diff --git a/crates/optix/Cargo.toml b/crates/optix/Cargo.toml index 639c58f1..600fdaf8 100644 --- a/crates/optix/Cargo.toml +++ b/crates/optix/Cargo.toml @@ -12,7 +12,7 @@ cust = { version = "0.3", path = "../cust", features=["impl_mint"] } cust_raw = { path = "../cust_raw", features=["driver"] } cfg-if = "1.0.0" bitflags = "2.9.0" -glam = { version = "0.29", features=["cuda", "libm"], default-features=false, optional=true } +glam = { version = "0.30", features=["cuda", "libm"], default-features=false, optional=true } half = { version = "2.4.1", optional = true } memoffset = "0.9.1" mint = "0.5.9" diff --git a/crates/optix/examples/ex04_mesh/Cargo.toml b/crates/optix/examples/ex04_mesh/Cargo.toml index a660b198..f044c8a6 100644 --- a/crates/optix/examples/ex04_mesh/Cargo.toml +++ b/crates/optix/examples/ex04_mesh/Cargo.toml @@ -12,7 +12,7 @@ anyhow = "1.0.44" glfw = "0.42.0" gl = "0.14.0" num-traits = "0.2.14" -glam = { version = "0.29.2", features=["cuda"] } +glam = { version = "0.30", features=["cuda"] } [build-dependencies] cuda_builder = { version = "0.3", path = "../../../cuda_builder" } diff --git a/crates/optix_device/Cargo.toml b/crates/optix_device/Cargo.toml index f1f6575f..2c3c466c 100644 --- a/crates/optix_device/Cargo.toml +++ b/crates/optix_device/Cargo.toml @@ -13,6 +13,3 @@ cuda_std = { version = "0.2", path = "../cuda_std" } paste = "1.0.15" seq-macro = "0.3.5" cust_core = { version = "0.1", path = "../cust_core" } - -[target.'cfg(not(target_os = "cuda"))'.dependencies] -glam = { version = "0.29", features = ["cuda"], default-features = false } diff --git a/crates/optix_device/src/lib.rs b/crates/optix_device/src/lib.rs index c13f28ad..811330a3 100644 --- a/crates/optix_device/src/lib.rs +++ b/crates/optix_device/src/lib.rs @@ -14,7 +14,8 @@ pub mod trace; pub mod transform; pub mod util; -use cuda_std::{glam, *}; +pub use cuda_std::glam; +use cuda_std::*; use glam::UVec3; pub use misc::*; diff --git a/crates/optix_device/src/trace.rs b/crates/optix_device/src/trace.rs index 6449c642..213b4a39 100644 --- a/crates/optix_device/src/trace.rs +++ b/crates/optix_device/src/trace.rs @@ -1,6 +1,6 @@ use crate::sys::*; +use cuda_std::glam::Vec3; use cust_core::DeviceCopy; -use glam::Vec3; use paste::paste; use seq_macro::seq; diff --git a/crates/optix_device/src/transform.rs b/crates/optix_device/src/transform.rs index 18bcc3e8..fe456316 100644 --- a/crates/optix_device/src/transform.rs +++ b/crates/optix_device/src/transform.rs @@ -1,8 +1,8 @@ // use std::hint::unreachable_unchecked; #[cfg(target_os = "cuda")] use core::arch::asm; +use cuda_std::glam::{Vec3, Vec4}; use cuda_std::gpu_only; -use glam::{Vec3, Vec4}; use crate::{intersection::ray_time, trace::TraversableHandle}; diff --git a/crates/optix_device/src/util.rs b/crates/optix_device/src/util.rs index f56f6f5a..edc1cb26 100644 --- a/crates/optix_device/src/util.rs +++ b/crates/optix_device/src/util.rs @@ -1,5 +1,5 @@ use crate::{intersect::get_attribute, payload::*}; -use glam::Vec3; +use cuda_std::glam::Vec3; pub fn pack_pointer(ptr: *mut T) -> (u32, u32) { let x = ptr as u32; diff --git a/examples/cuda/path_tracer/Cargo.toml b/examples/cuda/path_tracer/Cargo.toml index fc9680b4..e8164f48 100644 --- a/examples/cuda/path_tracer/Cargo.toml +++ b/examples/cuda/path_tracer/Cargo.toml @@ -4,7 +4,7 @@ version = "0.1.0" edition = "2018" [dependencies] -glam = { version = "0.30.1", features = ["bytemuck", "cuda"] } +glam = { version = "0.30", features = ["bytemuck", "cuda", "mint"] } bytemuck = { version = "1.21", features = ["derive"] } cust = { version = "0.3", path = "../../../crates/cust", features = [ "impl_glam", diff --git a/examples/cuda/path_tracer/kernels/Cargo.toml b/examples/cuda/path_tracer/kernels/Cargo.toml index 97a92bd2..d22b0ace 100644 --- a/examples/cuda/path_tracer/kernels/Cargo.toml +++ b/examples/cuda/path_tracer/kernels/Cargo.toml @@ -5,10 +5,12 @@ edition = "2018" [dependencies] cuda_std = { version = "0.2", path = "../../../../crates/cuda_std" } +glam = { version = "0.30", default-features = false, features = ["libm", "cuda"] } enum_dispatch = "0.3.13" gpu_rand = { version = "0.1", path = "../../../../crates/gpu_rand" } -cust_core = { path = "../../../../crates/cust_core", features=["vek"] } +cust_core = { path = "../../../../crates/cust_core", features=["glam"] } optix_device = { path = "../../../../crates/optix_device" } +approx = { version = "0.5" } [lib] crate-type = ["cdylib", "rlib"] diff --git a/examples/cuda/path_tracer/kernels/src/lib.rs b/examples/cuda/path_tracer/kernels/src/lib.rs index c3a0e578..ca1b3305 100644 --- a/examples/cuda/path_tracer/kernels/src/lib.rs +++ b/examples/cuda/path_tracer/kernels/src/lib.rs @@ -11,13 +11,12 @@ pub mod render_kernels; pub mod scene; pub mod sphere; -pub use cuda_std::glam; +pub use cuda_std::glam::{USizeVec2, Vec2, Vec3}; use cust_core::DeviceCopy; use enum_dispatch::enum_dispatch; use hittable::{HitRecord, Hittable}; use sphere::Sphere; -use glam::{USizeVec2, Vec2, Vec3}; pub type Point = Vec3; #[derive(Default, Clone, Copy, DeviceCopy)] diff --git a/examples/cuda/path_tracer/kernels/src/material.rs b/examples/cuda/path_tracer/kernels/src/material.rs index c5961f20..79ee31ff 100644 --- a/examples/cuda/path_tracer/kernels/src/material.rs +++ b/examples/cuda/path_tracer/kernels/src/material.rs @@ -1,4 +1,5 @@ use crate::{hittable::HitRecord, math::*, Ray, Vec3}; +use approx::{AbsDiffEq, RelativeEq}; use cust_core::DeviceCopy; use enum_dispatch::enum_dispatch; use gpu_rand::{DefaultRand, GpuRand}; @@ -25,8 +26,14 @@ pub struct DiffuseMaterial { impl Material for DiffuseMaterial { fn scatter(&self, _: Ray, hit: HitRecord, rng: &mut DefaultRand) -> (Vec3, Option) { let mut scatter_dir = hit.normal + random_in_unit_sphere(rng); - - if scatter_dir.is_approx_zero() { + let epsilon = f32::default_epsilon(); + let max_rel = f32::default_max_relative(); + let four_epsilon = epsilon + epsilon + epsilon + epsilon; + let four_max_rel = max_rel + max_rel + max_rel + max_rel; + if scatter_dir + .length_squared() + .relative_eq(&0., four_epsilon, four_max_rel) + { scatter_dir = hit.normal; } @@ -47,7 +54,7 @@ pub struct MetallicMaterial { impl Material for MetallicMaterial { fn scatter(&self, incoming: Ray, hit: HitRecord, rng: &mut DefaultRand) -> (Vec3, Option) { - let reflected = reflect(incoming.dir.normalized(), hit.normal); + let reflected = reflect(incoming.dir.normalize(), hit.normal); let scattered = Ray { origin: hit.point, dir: reflected + self.roughness * random_in_unit_sphere(rng), @@ -76,11 +83,11 @@ impl Material for DielectricMaterial { if incoming.dir.dot(hit.normal) > 0.0 { outward_norm = -hit.normal; ni_over_nt = self.ior; - cos = self.ior * incoming.dir.dot(hit.normal) / incoming.dir.magnitude(); + cos = self.ior * incoming.dir.dot(hit.normal) / incoming.dir.length(); } else { outward_norm = hit.normal; ni_over_nt = 1.0 / self.ior; - cos = -incoming.dir.dot(hit.normal) / incoming.dir.magnitude(); + cos = -incoming.dir.dot(hit.normal) / incoming.dir.length(); } if let Some(refracted) = refract(incoming.dir, outward_norm, ni_over_nt) { @@ -99,7 +106,7 @@ impl Material for DielectricMaterial { self.color, Some(Ray { origin: hit.point, - dir: reflect(incoming.dir.normalized(), hit.normal), + dir: reflect(incoming.dir.normalize(), hit.normal), }), ) } diff --git a/examples/cuda/path_tracer/kernels/src/math.rs b/examples/cuda/path_tracer/kernels/src/math.rs index 3581f9fc..1137b581 100644 --- a/examples/cuda/path_tracer/kernels/src/math.rs +++ b/examples/cuda/path_tracer/kernels/src/math.rs @@ -1,8 +1,8 @@ //! Generic math utilities. -use crate::Vec3; #[cfg(target_os = "cuda")] use cuda_std::GpuFloat; +use glam::Vec3; use gpu_rand::{DefaultRand, GpuRand}; /// Converts a float in the range of [0.0, 1.0] to a range of [-1.0, 1.0]. @@ -24,7 +24,7 @@ pub fn random_snorm_vec(state: &mut DefaultRand) -> Vec3 { pub fn random_in_unit_sphere(state: &mut DefaultRand) -> Vec3 { loop { let p = random_snorm_vec(state); - if p.magnitude_squared() >= 1.0 { + if p.length_squared() >= 1.0 { continue; } return p; @@ -36,7 +36,7 @@ pub fn reflect(v: Vec3, n: Vec3) -> Vec3 { } pub fn refract(v: Vec3, n: Vec3, ni_over_nt: f32) -> Option { - let uv = v.normalized(); + let uv = v.normalize(); let dt = uv.dot(n); let discriminant = 1.0 - ni_over_nt * ni_over_nt * (1.0 - dt * dt); if discriminant > 0.0 { diff --git a/examples/cuda/path_tracer/kernels/src/optix.rs b/examples/cuda/path_tracer/kernels/src/optix.rs index 7a0994f0..b25501e3 100644 --- a/examples/cuda/path_tracer/kernels/src/optix.rs +++ b/examples/cuda/path_tracer/kernels/src/optix.rs @@ -13,7 +13,7 @@ use cust_core::DeviceCopy; use gpu_rand::{DefaultRand, GpuRand}; use optix_device::{ closesthit, get_launch_index, - glam::Vec3Swizzles, + glam::{UVec2, Vec3Swizzles}, intersection, payload, raygen, sbt_data, trace::{RayFlags, TraversableHandle}, util::{get_vec3_attributes, pack_pointer, unpack_pointer}, @@ -31,8 +31,8 @@ extern "C" { #[derive(Clone, Copy)] pub struct LaunchParams<'a> { - pub image_buf: *mut Vec3, - pub size: Vec2, + pub image_buf: *mut Vec3, + pub size: UVec2, pub scene: Scene<'a>, pub viewport: Viewport, pub rand_states: *mut DefaultRand, @@ -69,8 +69,8 @@ pub unsafe fn __intersection__sphere() { if let Some(hit) = sphere.hit(ray, tmin, tmax) { // you could also recompute these values in the closesthit pretty easily. But optix provides us // 7 32-bit attribute regs which are perfect for passing these values. - let n = hit.normal.map(|x| x.to_bits()); - let p = hit.point.map(|x| x.to_bits()); + let n = hit.normal.to_array().map(|x| x.to_bits()); + let p = hit.point.to_array().map(|x| x.to_bits()); let mat = hit.material_handle as u32; intersection::report_intersection(hit.t, 0, [n[0], n[1], n[2], p[0], p[1], p[2], mat]); } @@ -111,10 +111,10 @@ pub unsafe fn __raygen__render() { let rng = PARAMS.rand_states.add(idx as usize); let offset = (*rng).normal_f32_2(); - let mut cur_ray = generate_ray(Vec2::from(i.to_array()), &PARAMS.viewport, offset.into()); + let mut cur_ray = generate_ray(i, &PARAMS.viewport, offset.into()); - let mut attenuation = Vec3::one(); - let mut color = Vec3::zero(); + let mut attenuation = Vec3::ONE; + let mut color = Vec3::ZERO; for _ in 0..MAX_BOUNCES { let mut prd = PerRayData { @@ -128,8 +128,8 @@ pub unsafe fn __raygen__render() { raygen::trace( PARAMS.handle, - (cur_ray.origin.into_array()).into(), - (cur_ray.dir.into_array()).into(), + cur_ray.origin, + cur_ray.dir, 0.001, 1e20, 0.0, diff --git a/examples/cuda/path_tracer/kernels/src/render.rs b/examples/cuda/path_tracer/kernels/src/render.rs index c4fa7303..86cd8cc6 100644 --- a/examples/cuda/path_tracer/kernels/src/render.rs +++ b/examples/cuda/path_tracer/kernels/src/render.rs @@ -1,15 +1,16 @@ use crate::*; +use cuda_std::glam::UVec2; const BACKGROUND_BLUE_MULTIPLIER: f32 = 0.7; pub fn color(ray: Ray) -> Vec3 { - let unit = ray.dir.normalized(); + let unit = ray.dir.normalize(); let t = BACKGROUND_BLUE_MULTIPLIER * (unit.y + 1.0); - (1.0 - t) * Vec3::one() + t * Vec3::new(0.5, 0.7, 1.0) + (1.0 - t) * Vec3::ONE + t * Vec3::new(0.5, 0.7, 1.0) } pub fn generate_ray(idx: UVec2, view: &Viewport, offset: Vec2) -> Ray { - let uv = (idx.numcast::().unwrap() + offset) / view.bounds.numcast().unwrap(); + let uv = (idx.as_vec2() + offset) / view.bounds.as_vec2(); Ray { origin: view.origin, dir: view.lower_left + uv.x * view.horizontal + uv.y * view.vertical - view.origin, diff --git a/examples/cuda/path_tracer/kernels/src/render_kernels.rs b/examples/cuda/path_tracer/kernels/src/render_kernels.rs index a036a884..8440ce7c 100644 --- a/examples/cuda/path_tracer/kernels/src/render_kernels.rs +++ b/examples/cuda/path_tracer/kernels/src/render_kernels.rs @@ -48,10 +48,9 @@ pub unsafe fn postprocess(fb: *const Vec3, out: *mut U8Vec3, view: Viewport) { let original = &*fb.add(idx); let out = &mut *out.add(idx); // gamma=2.0 - let gamma_corrected = original.sqrt(); + let gamma_corrected = original.map(f32::sqrt); *out = (gamma_corrected * 255.0) - .clamp(Vec3::zero(), Vec3::broadcast(255.0)) - .numcast() - .unwrap(); + .clamp(Vec3::ZERO, Vec3::splat(255.0)) + .as_u8vec3(); } diff --git a/examples/cuda/path_tracer/kernels/src/scene.rs b/examples/cuda/path_tracer/kernels/src/scene.rs index 8c17232f..511c85a2 100644 --- a/examples/cuda/path_tracer/kernels/src/scene.rs +++ b/examples/cuda/path_tracer/kernels/src/scene.rs @@ -45,7 +45,7 @@ impl Scene<'_> { pub fn ray_color(&self, ray: Ray, rng: &mut DefaultRand) -> Vec3 { let mut cur_ray = ray; - let mut attenuation = Vec3::one(); + let mut attenuation = Vec3::ONE; for _ in 0..MAX_BOUNCES { if let Some(hit) = self.hit(cur_ray, 0.001, f32::INFINITY) { @@ -55,12 +55,12 @@ impl Scene<'_> { attenuation *= hit_attenuation; cur_ray = scattered; } else { - return Vec3::zero(); + return Vec3::ZERO; } } else { return attenuation * render::color(cur_ray); } } - Vec3::zero() + Vec3::ZERO } } diff --git a/examples/cuda/path_tracer/src/common.rs b/examples/cuda/path_tracer/src/common.rs index 36ab3d0e..8e96d2cd 100644 --- a/examples/cuda/path_tracer/src/common.rs +++ b/examples/cuda/path_tracer/src/common.rs @@ -1,4 +1,4 @@ -use glam::{Vec2, Vec3}; +use glam::{DVec2, USizeVec2, Vec2, Vec3}; use glium::glutin::event::{ ElementState, Event, MouseButton, MouseScrollDelta, VirtualKeyCode, WindowEvent, }; @@ -20,8 +20,8 @@ impl Camera { let viewport_height = 2.0 * h; let viewport_width = self.aspect_ratio * viewport_height; - let w = (self.origin - self.lookat).normalized(); - let u = self.vup.cross(w).normalized(); + let w = (self.origin - self.lookat).normalize(); + let u = self.vup.cross(w).normalize(); let v = w.cross(u); viewport.origin = self.origin; @@ -34,7 +34,7 @@ impl Camera { #[derive(Debug, Clone, Copy, PartialEq)] pub struct CameraController { pub sensitivity: f32, - last_mouse_pos: Vec2, + last_mouse_pos: Vec2, yaw: f32, pitch: f32, mousewheel_pressed: bool, @@ -46,7 +46,7 @@ impl CameraController { pub fn new(dimensions: USizeVec2) -> Self { CameraController { sensitivity: 0.1, - last_mouse_pos: dimensions.numcast().unwrap() / 2.0, + last_mouse_pos: dimensions.as_vec2() / 2.0, yaw: -90.0, pitch: 0.0, mousewheel_pressed: false, @@ -59,7 +59,7 @@ impl CameraController { match event { Event::WindowEvent { event, .. } => match event { WindowEvent::CursorMoved { position, .. } => { - let mouse_pos = Vec2::new(position.x, position.y).numcast().unwrap(); + let mouse_pos = DVec2::new(position.x, position.y).as_vec2(); let delta = mouse_pos - self.last_mouse_pos; self.last_mouse_pos = mouse_pos; @@ -74,8 +74,8 @@ impl CameraController { if self.shift_pressed { let change = Vec2::new(-delta.x, delta.y) * self.sensitivity * 0.05; - camera.lookat += change; - camera.origin += change; + camera.lookat += change.extend(0.0); + camera.origin += change.extend(0.0); return true; } diff --git a/examples/cuda/path_tracer/src/cpu/mod.rs b/examples/cuda/path_tracer/src/cpu/mod.rs index 3a6117e5..1dc1a13d 100644 --- a/examples/cuda/path_tracer/src/cpu/mod.rs +++ b/examples/cuda/path_tracer/src/cpu/mod.rs @@ -1,6 +1,6 @@ use std::time::Duration; -use glam::{Clamp, Vec2, Vec3}; +use glam::{U8Vec3, USizeVec2, UVec2, Vec2, Vec3}; use gpu_rand::{DefaultRand, GpuRand}; use imgui::Ui; use path_tracer_kernels::{ @@ -14,7 +14,7 @@ use crate::{common::Camera, cuda::SEED}; pub struct CpuRenderer { // this is basically the cuda buffers but not gpu buffers. accumulated_buffer: Vec, - out_buffer: Vec>, + out_buffer: Vec, viewport: Viewport, objects: Vec, @@ -24,10 +24,10 @@ pub struct CpuRenderer { impl CpuRenderer { pub fn new(dimensions: USizeVec2, camera: &Camera, scene: &Scene) -> Self { - let accumulated_buffer = vec![Vec3::zero(); dimensions.product()]; - let out_buffer = vec![Vec3::zero(); dimensions.product()]; + let accumulated_buffer = vec![Vec3::ZERO; dimensions.element_product()]; + let out_buffer = vec![U8Vec3::ZERO; dimensions.element_product()]; - let rand_states = DefaultRand::initialize_states(SEED, dimensions.product()); + let rand_states = DefaultRand::initialize_states(SEED, dimensions.element_product()); let mut viewport = Viewport::default(); camera.as_viewport(&mut viewport); @@ -63,14 +63,15 @@ impl CpuRenderer { } pub fn update_camera(&mut self, new_camera: &Camera) { - self.accumulated_buffer.fill(Vec3::zero()); + self.accumulated_buffer.fill(Vec3::ZERO); new_camera.as_viewport(&mut self.viewport); } pub fn resize(&mut self, dimensions: USizeVec2) { self.accumulated_buffer - .resize(dimensions.product(), Vec3::zero()); - self.out_buffer.resize(dimensions.product(), Vec3::zero()); + .resize(dimensions.element_product(), Vec3::ZERO); + self.out_buffer + .resize(dimensions.element_product(), U8Vec3::ZERO); self.viewport.bounds = dimensions; } @@ -84,7 +85,7 @@ impl CpuRenderer { self.objects[idx] = new; } - pub fn final_image(&mut self, cur_sample: usize) -> (&[Vec3], Duration) { + pub fn final_image(&mut self, cur_sample: usize) -> (&[U8Vec3], Duration) { let start = std::time::Instant::now(); let Self { @@ -98,12 +99,11 @@ impl CpuRenderer { .zip(accumulated_buffer.par_iter()) .for_each(|(px, acc)| { let scaled = acc / cur_sample as f32; - let gamma_corrected = scaled.sqrt(); + let gamma_corrected = scaled.map(f32::sqrt); *px = (gamma_corrected * 255.0) - .clamped(Vec3::zero(), Vec3::broadcast(255.0)) - .numcast() - .unwrap(); + .clamp(Vec3::ZERO, Vec3::splat(255.0)) + .as_u8vec3(); }); (&self.out_buffer, start.elapsed()) @@ -131,7 +131,7 @@ impl CpuRenderer { .for_each(|(idx, (px, rng))| { let x = idx % viewport.bounds.x; let y = idx / viewport.bounds.x; - let idx = Vec2::new(x as u32, y as u32); + let idx = UVec2::new(x as u32, y as u32); let offset = Vec2::from(rng.normal_f32_2()); diff --git a/examples/cuda/path_tracer/src/cuda/data.rs b/examples/cuda/path_tracer/src/cuda/data.rs index 8b8eb5c3..86707b73 100644 --- a/examples/cuda/path_tracer/src/cuda/data.rs +++ b/examples/cuda/path_tracer/src/cuda/data.rs @@ -5,7 +5,7 @@ use cust::{ memory::{DeviceBuffer, DeviceCopy, UnifiedBuffer}, util::SliceExt, }; -use glam::{Vec2, Vec3}; +use glam::{U8Vec3, USizeVec2, Vec3}; use gpu_rand::DefaultRand; use path_tracer_kernels::{material::MaterialKind, scene::Scene, Object, Viewport}; @@ -16,13 +16,13 @@ use super::SEED; /// You could put these in the CUDA renderer but we separate them out for code readability. pub struct CudaRendererBuffers { /// The buffer of accumulated colors, every sample/render call adds its color to this buffer. - pub accumulated_buffer: DeviceBuffer>, + pub accumulated_buffer: DeviceBuffer, /// The scaled buffer of colors, this is just the accumulated colors divided by sample count. - pub scaled_buffer: DeviceBuffer>, + pub scaled_buffer: DeviceBuffer, /// The final image buffer after denoising and postprocessing. - pub out_buffer: DeviceBuffer>, + pub out_buffer: DeviceBuffer, /// The scaled buffer but denoised. In the future we will use the same buffer for this. - pub denoised_buffer: DeviceBuffer>, + pub denoised_buffer: DeviceBuffer, /// The viewport used by the render kernel to emit rays. pub viewport: Viewport, @@ -35,7 +35,7 @@ pub struct CudaRendererBuffers { } impl CudaRendererBuffers { - pub fn new(dimensions: Vec2, camera: &Camera, scene: &Scene) -> CudaResult { + pub fn new(dimensions: USizeVec2, camera: &Camera, scene: &Scene) -> CudaResult { let accumulated_buffer = Self::image_buffer(dimensions)?; let out_buffer = Self::image_buffer(dimensions)?; let denoised_buffer = Self::image_buffer(dimensions)?; @@ -48,7 +48,7 @@ impl CudaRendererBuffers { camera.as_viewport(&mut viewport); viewport.bounds = dimensions; - let rand_states = DefaultRand::initialize_states(SEED, dimensions.product()) + let rand_states = DefaultRand::initialize_states(SEED, dimensions.element_product()) .as_slice() .as_unified_buf()?; @@ -82,13 +82,13 @@ impl CudaRendererBuffers { } /// Resize the image-specific buffers for a new image size. - pub fn resize(&mut self, new: Vec2) -> CudaResult<()> { + pub fn resize(&mut self, new: USizeVec2) -> CudaResult<()> { self.viewport.bounds = new; self.accumulated_buffer = Self::image_buffer(new)?; self.out_buffer = Self::image_buffer(new)?; self.denoised_buffer = Self::image_buffer(new)?; self.scaled_buffer = Self::image_buffer(new)?; - self.rand_states = DefaultRand::initialize_states(SEED, new.product()) + self.rand_states = DefaultRand::initialize_states(SEED, new.element_product()) .as_slice() .as_unified_buf()?; Ok(()) @@ -106,8 +106,8 @@ impl CudaRendererBuffers { // could also use the convenience method on optix::denoiser::Image for this fn image_buffer( - dimensions: Vec2, - ) -> CudaResult>> { - DeviceBuffer::zeroed(dimensions.product()) + dimensions: USizeVec2, + ) -> CudaResult> { + DeviceBuffer::zeroed(dimensions.element_product()) } } diff --git a/examples/cuda/path_tracer/src/cuda/mod.rs b/examples/cuda/path_tracer/src/cuda/mod.rs index f737e6e8..f8c68d9b 100644 --- a/examples/cuda/path_tracer/src/cuda/mod.rs +++ b/examples/cuda/path_tracer/src/cuda/mod.rs @@ -14,13 +14,12 @@ use cust::{ memory::DeviceBox, prelude::*, }; +use glam::{U8Vec3, USizeVec2}; use optix::{ context::DeviceContext, denoiser::{Denoiser, DenoiserModelKind, Image, ImageFormat}, }; use path_tracer_kernels::scene::Scene; -use vek::{Vec2, Vec3}; - /// Seed for the random states pub const SEED: u64 = 932174513921034; @@ -39,12 +38,12 @@ pub struct CudaRenderer { _context: Context, buffers: CudaRendererBuffers, - cpu_image: Vec>, + cpu_image: Vec, optix_renderer: OptixRenderer, } impl CudaRenderer { - pub fn new(dimensions: Vec2, camera: &Camera, scene: &Scene) -> Result { + pub fn new(dimensions: USizeVec2, camera: &Camera, scene: &Scene) -> Result { let context = cust::quick_init()?; optix::init().unwrap(); @@ -60,7 +59,7 @@ impl CudaRenderer { .unwrap(); let buffers = CudaRendererBuffers::new(dimensions, camera, scene)?; - let cpu_image = vec![Vec3::zero(); dimensions.product()]; + let cpu_image = vec![U8Vec3::ZERO; dimensions.element_product()]; let optix_renderer = OptixRenderer::new(&mut optix_context, &stream, scene)?; @@ -93,9 +92,10 @@ impl CudaRenderer { } /// Resize the image-specific data for a new size - pub fn resize(&mut self, new_size: Vec2) -> CudaResult<()> { + pub fn resize(&mut self, new_size: USizeVec2) -> CudaResult<()> { self.buffers.resize(new_size)?; - self.cpu_image.resize(new_size.product(), Vec3::zero()); + self.cpu_image + .resize(new_size.element_product(), U8Vec3::ZERO); self.denoiser .setup_state(&self.stream, new_size.x as u32, new_size.y as u32, false) @@ -105,7 +105,7 @@ impl CudaRenderer { /// calculate an optimal launch configuration for an image kernel fn launch_dimensions(&self) -> (GridSize, BlockSize) { - let threads = Vec2::broadcast(THREAD_BLOCK_AXIS_LENGTH); + let threads = USizeVec2::splat(THREAD_BLOCK_AXIS_LENGTH); let blocks = (self.buffers.viewport.bounds / threads) + 1; (blocks.into(), threads.into()) } @@ -118,7 +118,7 @@ impl CudaRenderer { &mut self, cur_sample: usize, denoise: bool, - ) -> CudaResult<(&[Vec3], Duration, Duration)> { + ) -> CudaResult<(&[U8Vec3], Duration, Duration)> { let module = &self.module; let stream = &self.stream; diff --git a/examples/cuda/path_tracer/src/main.rs b/examples/cuda/path_tracer/src/main.rs index 5d847774..65dad0b1 100644 --- a/examples/cuda/path_tracer/src/main.rs +++ b/examples/cuda/path_tracer/src/main.rs @@ -22,7 +22,7 @@ fn main() -> Result<(), Box> { let camera = Camera { origin: Vec3::new(0.0, 0.5, 2.0), lookat: Vec3::new(0.0, 0.0, -0.5), - vup: Vec3::unit_y(), + vup: Vec3::Y, fov: 70.0, aspect_ratio: (WIDTH as f32) / (HEIGHT as f32), }; diff --git a/examples/cuda/path_tracer/src/optix/mod.rs b/examples/cuda/path_tracer/src/optix/mod.rs index 57191234..77694ec4 100644 --- a/examples/cuda/path_tracer/src/optix/mod.rs +++ b/examples/cuda/path_tracer/src/optix/mod.rs @@ -179,7 +179,7 @@ impl OptixRenderer { } pub fn render(&mut self, stream: &Stream, buffers: &mut CudaRendererBuffers) -> Result<()> { - let dims = buffers.viewport.bounds.numcast().unwrap(); + let dims = buffers.viewport.bounds.as_uvec2(); let launch_params = LaunchParams { image_buf: buffers.accumulated_buffer.as_device_ptr().as_mut_ptr(), diff --git a/examples/cuda/path_tracer/src/renderer.rs b/examples/cuda/path_tracer/src/renderer.rs index 9bda19d0..0558b37e 100644 --- a/examples/cuda/path_tracer/src/renderer.rs +++ b/examples/cuda/path_tracer/src/renderer.rs @@ -1,8 +1,8 @@ +use glam::USizeVec2; use glium::glutin::{event::Event, event_loop::ControlFlow}; use imgui::Ui; use path_tracer_kernels::scene::Scene; use sysinfo::System; -use vek::Vec2; use crate::{ common::{Camera, CameraController}, @@ -23,7 +23,7 @@ pub struct Renderer { } impl Renderer { - pub fn new(dimensions: Vec2, camera: &Camera, scene: &Scene) -> Self { + pub fn new(dimensions: USizeVec2, camera: &Camera, scene: &Scene) -> Self { Self { cuda: CudaRenderer::new(dimensions, camera, scene) .expect("Failed to make CUDA renderer"), @@ -38,7 +38,7 @@ impl Renderer { } } - pub fn resize(&mut self, new: Vec2) { + pub fn resize(&mut self, new: USizeVec2) { self.accumulated_samples = 0; self.cpu.resize(new); self.cuda diff --git a/examples/cuda/path_tracer/src/viewer.rs b/examples/cuda/path_tracer/src/viewer.rs index 3237f210..4eef12e6 100644 --- a/examples/cuda/path_tracer/src/viewer.rs +++ b/examples/cuda/path_tracer/src/viewer.rs @@ -1,3 +1,4 @@ +use glam::USizeVec2; use glium::{ glutin::{ dpi::PhysicalSize, @@ -16,7 +17,6 @@ use imgui::Condition; use imgui_winit_support::{HiDpiMode, WinitPlatform}; use path_tracer_kernels::scene::Scene; use std::time::Instant; -use vek::Vec2; use crate::{common::Camera, renderer::Renderer, HEIGHT, WIDTH}; @@ -59,7 +59,11 @@ pub fn run(camera: &Camera, scene: &Scene) -> ! { .with_inner_size(PhysicalSize::new(WIDTH as f64, HEIGHT as f64)); let cb = ContextBuilder::new().with_vsync(true); let display = Display::new(wb, cb, &event_loop).unwrap(); - let renderer = Renderer::new(Vec2::new(WIDTH as usize, HEIGHT as usize), camera, scene); + let renderer = Renderer::new( + USizeVec2::new(WIDTH as usize, HEIGHT as usize), + camera, + scene, + ); let mut viewer = ViewerRenderer::new(display, renderer); let mut last_frame = Instant::now(); @@ -72,7 +76,7 @@ pub fn run(camera: &Camera, scene: &Scene) -> ! { struct ViewerRenderer { vertex_buffer: VertexBuffer, image_program: Program, - image_size: Vec2, + image_size: USizeVec2, renderer: Renderer, imgui_ctx: imgui::Context, texture: SrgbTexture2d, @@ -87,7 +91,7 @@ impl ViewerRenderer { let image_program = Program::from_source(&display, IMAGE_VERT, IMAGE_FRAG, None).unwrap(); let size = display.gl_window().window().inner_size(); - let image_size = Vec2::new(size.width as usize, size.height as usize); + let image_size = USizeVec2::new(size.width as usize, size.height as usize); let texture = SrgbTexture2d::empty(&display, image_size.x as u32, image_size.y as u32).unwrap(); @@ -152,7 +156,7 @@ impl ViewerRenderer { *control_flow = ControlFlow::Exit; } WindowEvent::Resized(new) => { - let image_size = Vec2::new(new.width as usize, new.height as usize); + let image_size = USizeVec2::new(new.width as usize, new.height as usize); self.image_size = image_size; self.texture = SrgbTexture2d::empty( &self.display, From cee76521381ac369227cb8afab9d212d514a0d1c Mon Sep 17 00:00:00 2001 From: Christian Legnitto Date: Sat, 12 Jul 2025 22:36:35 +0200 Subject: [PATCH 3/3] Move examples to glam --- crates/optix/examples/ex03_window/Cargo.toml | 1 + .../optix/examples/ex03_window/src/gl_util.rs | 4 +- crates/optix/examples/ex03_window/src/main.rs | 7 +- .../examples/ex03_window/src/renderer.rs | 22 +- .../optix/examples/ex03_window/src/vector.rs | 301 ------------------ crates/optix/examples/ex04_mesh/Cargo.toml | 2 +- crates/optix/examples/ex04_mesh/src/vector.rs | 301 ------------------ examples/optix/denoiser/Cargo.toml | 4 +- examples/optix/denoiser/src/main.rs | 21 +- 9 files changed, 29 insertions(+), 634 deletions(-) delete mode 100644 crates/optix/examples/ex03_window/src/vector.rs delete mode 100644 crates/optix/examples/ex04_mesh/src/vector.rs diff --git a/crates/optix/examples/ex03_window/Cargo.toml b/crates/optix/examples/ex03_window/Cargo.toml index 089ef9e7..f93f2a21 100644 --- a/crates/optix/examples/ex03_window/Cargo.toml +++ b/crates/optix/examples/ex03_window/Cargo.toml @@ -13,3 +13,4 @@ anyhow = "1.0.44" glfw = "0.42.0" gl = "0.14.0" num-traits = "0.2.14" +glam = { version = "0.30", features = ["bytemuck"] } diff --git a/crates/optix/examples/ex03_window/src/gl_util.rs b/crates/optix/examples/ex03_window/src/gl_util.rs index 9cbc48c9..2199181a 100644 --- a/crates/optix/examples/ex03_window/src/gl_util.rs +++ b/crates/optix/examples/ex03_window/src/gl_util.rs @@ -2,7 +2,7 @@ use gl; use gl::types::{GLchar, GLenum, GLint, GLsizeiptr, GLuint, GLvoid}; use std::ffi::{CStr, CString}; -use crate::vector::*; +use glam::Vec4; pub struct Shader { id: GLuint, @@ -516,7 +516,7 @@ impl FullscreenQuad { self.vertex_array.unbind(); } - pub fn update_texture(&self, data: &[V4f32]) { + pub fn update_texture(&self, data: &[Vec4]) { unsafe { gl::BindTexture(gl::TEXTURE_2D, self.texture_id); gl::TexSubImage2D( diff --git a/crates/optix/examples/ex03_window/src/main.rs b/crates/optix/examples/ex03_window/src/main.rs index 08bc998e..972eb939 100644 --- a/crates/optix/examples/ex03_window/src/main.rs +++ b/crates/optix/examples/ex03_window/src/main.rs @@ -3,8 +3,7 @@ mod renderer; use renderer::Renderer; -mod vector; -pub use vector::*; +use glam::{IVec2, Vec4}; mod gl_util; use gl_util::FullscreenQuad; use glfw::{Action, Context, Key}; @@ -42,7 +41,7 @@ fn main() -> Result<(), Box> { let mut fsq = FullscreenQuad::new(width, height).unwrap(); - let mut image_data = vec![v4f32(0.0, 0.0, 0.0, 0.0); (width * height) as usize]; + let mut image_data = vec![Vec4::new(0.0, 0.0, 0.0, 0.0); (width * height) as usize]; unsafe { gl::Viewport(0, 0, fb_width, fb_height); @@ -62,7 +61,7 @@ fn main() -> Result<(), Box> { renderer.resize(w, h)?; width = w; height = h; - image_data.resize((width * height) as usize, v4f32(0.0, 0.0, 0.0, 0.0)); + image_data.resize((width * height) as usize, Vec4::new(0.0, 0.0, 0.0, 0.0)); } renderer.render()?; diff --git a/crates/optix/examples/ex03_window/src/renderer.rs b/crates/optix/examples/ex03_window/src/renderer.rs index 03acd884..7bcfab71 100644 --- a/crates/optix/examples/ex03_window/src/renderer.rs +++ b/crates/optix/examples/ex03_window/src/renderer.rs @@ -14,7 +14,7 @@ use optix::{ shader_binding_table::{SbtRecord, ShaderBindingTable}, }; -use crate::vector::V4f32; +use glam::{IVec2, Vec4}; pub struct Renderer { launch_params: DeviceVariable, @@ -23,7 +23,7 @@ pub struct Renderer { buf_hitgroup: DeviceBuffer, buf_miss: DeviceBuffer, pipeline: Pipeline, - color_buffer: DeviceBuffer, + color_buffer: DeviceBuffer, ctx: DeviceContext, stream: Stream, cuda_context: CuContext, @@ -144,10 +144,7 @@ impl Renderer { let launch_params = DeviceVariable::new(LaunchParams { frame_id: 0, color_buffer: color_buffer.as_device_ptr(), - fb_size: Point2i { - x: width as i32, - y: height as i32, - }, + fb_size: IVec2::new(width as i32, height as i32), })?; Ok(Renderer { @@ -193,24 +190,17 @@ impl Renderer { Ok(()) } - pub fn download_pixels(&self, slice: &mut [V4f32]) -> Result<(), Box> { + pub fn download_pixels(&self, slice: &mut [Vec4]) -> Result<(), Box> { self.color_buffer.copy_to(slice)?; Ok(()) } } -#[repr(C)] -#[derive(Copy, Clone, DeviceCopy)] -struct Point2i { - pub x: i32, - pub y: i32, -} - #[repr(C)] #[derive(Copy, Clone, DeviceCopy)] struct LaunchParams { - pub color_buffer: DevicePointer, - pub fb_size: Point2i, + pub color_buffer: DevicePointer, + pub fb_size: IVec2, pub frame_id: i32, } diff --git a/crates/optix/examples/ex03_window/src/vector.rs b/crates/optix/examples/ex03_window/src/vector.rs deleted file mode 100644 index 589a4134..00000000 --- a/crates/optix/examples/ex03_window/src/vector.rs +++ /dev/null @@ -1,301 +0,0 @@ -use core::ops; -pub use num_traits::{One, Zero}; - -pub trait Scalar: num_traits::One + num_traits::Zero {} - -impl Scalar for i8 {} -impl Scalar for i16 {} -impl Scalar for i32 {} -impl Scalar for i64 {} -impl Scalar for f32 {} -impl Scalar for f64 {} - -pub trait Vector { - type Component: Scalar; - - fn dot(&self, v: &Self) -> Self::Component; - - #[inline] - fn length2(&self) -> Self::Component { - self.dot(&self) - } -} - -macro_rules! vec_impl { - ($name:ident: $t:ty, $sc:ident, $align:expr, ($($c:ident),+)) => { - #[repr(C)] - #[derive(Clone, Copy, Default, PartialEq, Debug)] - pub struct $name - { - $( - pub $c: $t, - )+ - } - - impl $name - { - pub fn new($($c: $t),+) -> Self - { - Self { - $( - $c, - )+ - } - } - } - - impl Vector for $name - { - type Component = $t; - - #[inline] - fn dot(&self, v: &Self) -> $t - { - <$t>::zero() $( - + self.$c * v.$c - )+ - } - } - - impl From<$t> for $name - { - fn from(x: $t) -> Self - { - Self { - $( - $c: x, - )+ - } - } - } - - impl ops::Neg for $name - { - type Output = Self; - - fn neg(self) -> Self - { - Self { - $( - $c: -self.$c, - )+ - } - } - } - - impl ops::Add for $name - { - type Output = Self; - - #[inline] - fn add(self, v: Self) -> Self - { - Self { - $( - $c: self.$c + v.$c, - )+ - } - } - } - - impl ops::AddAssign for $name - { - #[inline] - fn add_assign(&mut self, v: Self) - { - $( - self.$c += v.$c; - )+ - } - } - - impl ops::Sub for $name - { - type Output = Self; - - #[inline] - fn sub(self, v: Self) -> Self - { - Self { - $( - $c: self.$c - v.$c, - )+ - } - } - } - - impl ops::SubAssign for $name - { - #[inline] - fn sub_assign(&mut self, v: Self) - { - $( - self.$c -= v.$c; - )+ - } - } - - impl ops::Mul for $name - { - type Output = Self; - - #[inline] - fn mul(self, v: Self) -> Self - { - Self { - $( - $c: self.$c * v.$c, - )+ - } - } - } - - impl ops::MulAssign for $name - { - #[inline] - fn mul_assign(&mut self, v: Self) - { - $( - self.$c *= v.$c; - )+ - } - } - - impl ops::Mul<$t> for $name - { - type Output = Self; - - #[inline] - fn mul(self, v: $t) -> Self - { - Self { - $( - $c: self.$c * v, - )+ - } - } - } - - impl ops::MulAssign<$t> for $name - { - #[inline] - fn mul_assign(&mut self, v: $t) - { - $( - self.$c *= v; - )+ - } - } - - impl ops::Div<$t> for $name - { - type Output = Self; - - #[inline] - fn div(self, v: $t) -> Self - { - Self { - $( - $c: self.$c / v, - )+ - } - } - } - - impl ops::DivAssign<$t> for $name - { - #[inline] - fn div_assign(&mut self, v: $t) - { - $( - self.$c /= v; - )+ - } - } - - impl ops::Mul<$name> for $t - { - type Output = $name; - - #[inline] - fn mul(self, v: $name) -> $name - { - $name { - $( - $c: self * v.$c, - )+ - } - } - } - - impl ops::Div<$name> for $t - { - type Output = $name; - - #[inline] - fn div(self, v: $name) -> $name - { - $name { - $( - $c: self / v.$c, - )+ - } - } - } - - pub fn $sc($($c: $t),+) -> $name - { - $name { - $( - $c, - )+ - } - } - - unsafe impl cust::memory::DeviceCopy for $name { - // fn device_align() -> usize { - // $align - // } - } - }; - -} - -vec_impl!(V2i8: i8, v2i8, 1, (x, y)); -vec_impl!(V2i16: i16, v2i16, 2, (x, y)); -vec_impl!(V2i32: i32, v2i32, 8, (x, y)); -vec_impl!(V2i64: i64, v2i64, 8, (x, y)); -vec_impl!(V3i8: i8, v3i8, 1, (x, y, z)); -vec_impl!(V3i16: i16, v3i16, 2, (x, y, z)); -vec_impl!(V3i32: i32, v3i32, 4, (x, y, z)); -vec_impl!(V3i64: i64, v3i64, 8, (x, y, z)); -vec_impl!(V4i8: i8, v4i8, 1, (x, y, z, w)); -vec_impl!(V4i16: i16, v4i16, 2, (x, y, z, w)); -vec_impl!(V4i32: i32, v4i32, 16, (x, y, z, w)); -vec_impl!(V4i64: i64, v4i64, 8, (x, y, z, w)); - -vec_impl!(V2f32: f32, v2f32, 8, (x, y)); -vec_impl!(V2f64: f64, v2f64, 8, (x, y)); -vec_impl!(V3f32: f32, v3f32, 4, (x, y, z)); -vec_impl!(V3f64: f64, v3f64, 8, (x, y, z)); -vec_impl!(V4f32: f32, v4f32, 16, (x, y, z, w)); -vec_impl!(V4f64: f64, v4f64, 8, (x, y, z, w)); - -vec_impl!(P2f32: f32, p2f32, 8, (x, y)); -vec_impl!(P2f64: f64, p2f64, 8, (x, y)); -vec_impl!(P3f32: f32, p3f32, 4, (x, y, z)); -vec_impl!(P3f64: f64, p3f64, 8, (x, y, z)); -vec_impl!(P4f32: f32, p4f32, 16, (x, y, z, w)); -vec_impl!(P4f64: f64, p4f64, 8, (x, y, z, w)); - -vec_impl!(N2f32: f32, n2f32, 8, (x, y)); -vec_impl!(N2f64: f64, n2f64, 8, (x, y)); -vec_impl!(N3f32: f32, n3f32, 4, (x, y, z)); -vec_impl!(N3f64: f64, n3f64, 8, (x, y, z)); -vec_impl!(N4f32: f32, n4f32, 16, (x, y, z, w)); -vec_impl!(N4f64: f64, n4f64, 8, (x, y, z, w)); - -#[inline] -pub fn dot(a: &T, b: &T) -> T::Component { - a.dot(b) -} diff --git a/crates/optix/examples/ex04_mesh/Cargo.toml b/crates/optix/examples/ex04_mesh/Cargo.toml index f044c8a6..7bb3edc0 100644 --- a/crates/optix/examples/ex04_mesh/Cargo.toml +++ b/crates/optix/examples/ex04_mesh/Cargo.toml @@ -12,7 +12,7 @@ anyhow = "1.0.44" glfw = "0.42.0" gl = "0.14.0" num-traits = "0.2.14" -glam = { version = "0.30", features=["cuda"] } +glam = { version = "0.30", features = ["cuda"] } [build-dependencies] cuda_builder = { version = "0.3", path = "../../../cuda_builder" } diff --git a/crates/optix/examples/ex04_mesh/src/vector.rs b/crates/optix/examples/ex04_mesh/src/vector.rs deleted file mode 100644 index 589a4134..00000000 --- a/crates/optix/examples/ex04_mesh/src/vector.rs +++ /dev/null @@ -1,301 +0,0 @@ -use core::ops; -pub use num_traits::{One, Zero}; - -pub trait Scalar: num_traits::One + num_traits::Zero {} - -impl Scalar for i8 {} -impl Scalar for i16 {} -impl Scalar for i32 {} -impl Scalar for i64 {} -impl Scalar for f32 {} -impl Scalar for f64 {} - -pub trait Vector { - type Component: Scalar; - - fn dot(&self, v: &Self) -> Self::Component; - - #[inline] - fn length2(&self) -> Self::Component { - self.dot(&self) - } -} - -macro_rules! vec_impl { - ($name:ident: $t:ty, $sc:ident, $align:expr, ($($c:ident),+)) => { - #[repr(C)] - #[derive(Clone, Copy, Default, PartialEq, Debug)] - pub struct $name - { - $( - pub $c: $t, - )+ - } - - impl $name - { - pub fn new($($c: $t),+) -> Self - { - Self { - $( - $c, - )+ - } - } - } - - impl Vector for $name - { - type Component = $t; - - #[inline] - fn dot(&self, v: &Self) -> $t - { - <$t>::zero() $( - + self.$c * v.$c - )+ - } - } - - impl From<$t> for $name - { - fn from(x: $t) -> Self - { - Self { - $( - $c: x, - )+ - } - } - } - - impl ops::Neg for $name - { - type Output = Self; - - fn neg(self) -> Self - { - Self { - $( - $c: -self.$c, - )+ - } - } - } - - impl ops::Add for $name - { - type Output = Self; - - #[inline] - fn add(self, v: Self) -> Self - { - Self { - $( - $c: self.$c + v.$c, - )+ - } - } - } - - impl ops::AddAssign for $name - { - #[inline] - fn add_assign(&mut self, v: Self) - { - $( - self.$c += v.$c; - )+ - } - } - - impl ops::Sub for $name - { - type Output = Self; - - #[inline] - fn sub(self, v: Self) -> Self - { - Self { - $( - $c: self.$c - v.$c, - )+ - } - } - } - - impl ops::SubAssign for $name - { - #[inline] - fn sub_assign(&mut self, v: Self) - { - $( - self.$c -= v.$c; - )+ - } - } - - impl ops::Mul for $name - { - type Output = Self; - - #[inline] - fn mul(self, v: Self) -> Self - { - Self { - $( - $c: self.$c * v.$c, - )+ - } - } - } - - impl ops::MulAssign for $name - { - #[inline] - fn mul_assign(&mut self, v: Self) - { - $( - self.$c *= v.$c; - )+ - } - } - - impl ops::Mul<$t> for $name - { - type Output = Self; - - #[inline] - fn mul(self, v: $t) -> Self - { - Self { - $( - $c: self.$c * v, - )+ - } - } - } - - impl ops::MulAssign<$t> for $name - { - #[inline] - fn mul_assign(&mut self, v: $t) - { - $( - self.$c *= v; - )+ - } - } - - impl ops::Div<$t> for $name - { - type Output = Self; - - #[inline] - fn div(self, v: $t) -> Self - { - Self { - $( - $c: self.$c / v, - )+ - } - } - } - - impl ops::DivAssign<$t> for $name - { - #[inline] - fn div_assign(&mut self, v: $t) - { - $( - self.$c /= v; - )+ - } - } - - impl ops::Mul<$name> for $t - { - type Output = $name; - - #[inline] - fn mul(self, v: $name) -> $name - { - $name { - $( - $c: self * v.$c, - )+ - } - } - } - - impl ops::Div<$name> for $t - { - type Output = $name; - - #[inline] - fn div(self, v: $name) -> $name - { - $name { - $( - $c: self / v.$c, - )+ - } - } - } - - pub fn $sc($($c: $t),+) -> $name - { - $name { - $( - $c, - )+ - } - } - - unsafe impl cust::memory::DeviceCopy for $name { - // fn device_align() -> usize { - // $align - // } - } - }; - -} - -vec_impl!(V2i8: i8, v2i8, 1, (x, y)); -vec_impl!(V2i16: i16, v2i16, 2, (x, y)); -vec_impl!(V2i32: i32, v2i32, 8, (x, y)); -vec_impl!(V2i64: i64, v2i64, 8, (x, y)); -vec_impl!(V3i8: i8, v3i8, 1, (x, y, z)); -vec_impl!(V3i16: i16, v3i16, 2, (x, y, z)); -vec_impl!(V3i32: i32, v3i32, 4, (x, y, z)); -vec_impl!(V3i64: i64, v3i64, 8, (x, y, z)); -vec_impl!(V4i8: i8, v4i8, 1, (x, y, z, w)); -vec_impl!(V4i16: i16, v4i16, 2, (x, y, z, w)); -vec_impl!(V4i32: i32, v4i32, 16, (x, y, z, w)); -vec_impl!(V4i64: i64, v4i64, 8, (x, y, z, w)); - -vec_impl!(V2f32: f32, v2f32, 8, (x, y)); -vec_impl!(V2f64: f64, v2f64, 8, (x, y)); -vec_impl!(V3f32: f32, v3f32, 4, (x, y, z)); -vec_impl!(V3f64: f64, v3f64, 8, (x, y, z)); -vec_impl!(V4f32: f32, v4f32, 16, (x, y, z, w)); -vec_impl!(V4f64: f64, v4f64, 8, (x, y, z, w)); - -vec_impl!(P2f32: f32, p2f32, 8, (x, y)); -vec_impl!(P2f64: f64, p2f64, 8, (x, y)); -vec_impl!(P3f32: f32, p3f32, 4, (x, y, z)); -vec_impl!(P3f64: f64, p3f64, 8, (x, y, z)); -vec_impl!(P4f32: f32, p4f32, 16, (x, y, z, w)); -vec_impl!(P4f64: f64, p4f64, 8, (x, y, z, w)); - -vec_impl!(N2f32: f32, n2f32, 8, (x, y)); -vec_impl!(N2f64: f64, n2f64, 8, (x, y)); -vec_impl!(N3f32: f32, n3f32, 4, (x, y, z)); -vec_impl!(N3f64: f64, n3f64, 8, (x, y, z)); -vec_impl!(N4f32: f32, n4f32, 16, (x, y, z, w)); -vec_impl!(N4f64: f64, n4f64, 8, (x, y, z, w)); - -#[inline] -pub fn dot(a: &T, b: &T) -> T::Component { - a.dot(b) -} diff --git a/examples/optix/denoiser/Cargo.toml b/examples/optix/denoiser/Cargo.toml index 134beb20..92eb5177 100644 --- a/examples/optix/denoiser/Cargo.toml +++ b/examples/optix/denoiser/Cargo.toml @@ -6,6 +6,6 @@ edition = "2021" [dependencies] optix = { version = "0.1", path = "../../../crates/optix" } structopt = "0.3" -cust = { version = "0.3", path = "../../../crates/cust", features = ["impl_vek", "bytemuck"] } +cust = { version = "0.3", path = "../../../crates/cust", features = ["impl_glam", "bytemuck"] } image = "0.25.5" -vek = { version = "0.17.1", features = ["bytemuck"] } +glam = { version = "0.30", features = ["bytemuck"] } diff --git a/examples/optix/denoiser/src/main.rs b/examples/optix/denoiser/src/main.rs index 26755948..93fcd714 100644 --- a/examples/optix/denoiser/src/main.rs +++ b/examples/optix/denoiser/src/main.rs @@ -1,13 +1,13 @@ use cust::memory::DeviceBuffer; use cust::prelude::{Stream, StreamFlags}; use cust::util::SliceExt; +use glam::Vec3; use image::ImageReader; use optix::context::DeviceContext; use optix::denoiser::{Denoiser, DenoiserModelKind, DenoiserParams, Image, ImageFormat}; use std::error::Error; use std::path::PathBuf; use structopt::StructOpt; -use vek::{Clamp, Vec3}; #[derive(StructOpt)] #[structopt( @@ -31,7 +31,7 @@ fn main() -> Result<(), Box> { let img = ImageReader::open(opt.input)?.decode()?; let mut rgb = img.into_rgb8(); - let mut linear = vec![Vec3::::zero(); rgb.as_raw().len()]; + let mut linear = vec![Vec3::ZERO; rgb.as_raw().len() / 3]; let width = rgb.width(); let height = rgb.height(); @@ -39,8 +39,11 @@ fn main() -> Result<(), Box> { rgb.pixels() .zip(linear.iter_mut()) .for_each(|(rgb, linear)| { - let rgbvec = Vec3::::from(rgb.0); - *linear = rgbvec.numcast::().unwrap().map(|x| x / 255.0); + *linear = Vec3::new( + rgb.0[0] as f32 / 255.0, + rgb.0[1] as f32 / 255.0, + rgb.0[2] as f32 / 255.0, + ); }); // set up CUDA and OptiX then make the needed structs/contexts. @@ -61,7 +64,7 @@ fn main() -> Result<(), Box> { // allocate the buffer for the noisy image and copy the data to the GPU. let in_buf = linear.as_slice().as_dbuf()?; - let mut out_buf = DeviceBuffer::>::zeroed((width * height) as usize)?; + let mut out_buf = DeviceBuffer::::zeroed((width * height) as usize)?; // make an image to tell OptiX about how our image buffer is represented let input_image = Image::new(&in_buf, ImageFormat::Float3, width, height); @@ -87,8 +90,12 @@ fn main() -> Result<(), Box> { .into_iter() .zip(rgb.pixels_mut()) .for_each(|(linear, rgb)| { - let transformed = (linear * 255.0).clamped(0.0, 255.0); - rgb.0 = transformed.numcast().unwrap().into_array(); + let transformed = (linear * 255.0).clamp(Vec3::ZERO, Vec3::splat(255.0)); + rgb.0 = [ + transformed.x as u8, + transformed.y as u8, + transformed.z as u8, + ]; }); // ...and then save the image