diff options
Diffstat (limited to 'src/core/sse2/matrix.rs')
-rw-r--r-- | src/core/sse2/matrix.rs | 558 |
1 files changed, 0 insertions, 558 deletions
diff --git a/src/core/sse2/matrix.rs b/src/core/sse2/matrix.rs deleted file mode 100644 index 5850928..0000000 --- a/src/core/sse2/matrix.rs +++ /dev/null @@ -1,558 +0,0 @@ -#[cfg(target_arch = "x86")] -use core::arch::x86::*; -#[cfg(target_arch = "x86_64")] -use core::arch::x86_64::*; - -use core::mem::MaybeUninit; - -use crate::core::{ - storage::{Align16, Columns2, Columns3, Columns4, XY, XYZ}, - traits::{ - matrix::{ - FloatMatrix2x2, FloatMatrix3x3, FloatMatrix4x4, Matrix, Matrix2x2, Matrix3x3, - Matrix4x4, MatrixConst, - }, - projection::ProjectionMatrix, - scalar::NanConstEx, - vector::{FloatVector4, Vector, Vector4, Vector4Const}, - }, -}; - -// __m128 as a Matrix2x2 -impl MatrixConst for __m128 { - const ZERO: __m128 = const_f32x4!([0.0, 0.0, 0.0, 0.0]); - const IDENTITY: __m128 = const_f32x4!([1.0, 0.0, 0.0, 1.0]); -} - -impl Matrix<f32> for __m128 {} - -impl Matrix2x2<f32, XY<f32>> for __m128 { - #[inline(always)] - fn new(m00: f32, m01: f32, m10: f32, m11: f32) -> Self { - unsafe { _mm_set_ps(m11, m10, m01, m00) } - } - - #[inline(always)] - fn from_cols(x_axis: XY<f32>, y_axis: XY<f32>) -> Self { - Matrix2x2::new(x_axis.x, x_axis.y, y_axis.x, y_axis.y) - } - - #[inline(always)] - fn x_axis(&self) -> &XY<f32> { - unsafe { &(*(self as *const Self).cast::<Columns2<XY<f32>>>()).x_axis } - } - - #[inline(always)] - fn y_axis(&self) -> &XY<f32> { - unsafe { &(*(self as *const Self).cast::<Columns2<XY<f32>>>()).y_axis } - } - - #[inline] - fn determinant(&self) -> f32 { - // self.x_axis.x * self.y_axis.y - self.x_axis.y * self.y_axis.x - unsafe { - let abcd = *self; - let dcba = _mm_shuffle_ps(abcd, abcd, 0b00_01_10_11); - let prod = _mm_mul_ps(abcd, dcba); - let det = _mm_sub_ps(prod, _mm_shuffle_ps(prod, prod, 0b01_01_01_01)); - _mm_cvtss_f32(det) - } - } - - #[inline(always)] - fn transpose(&self) -> Self { - unsafe { _mm_shuffle_ps(*self, *self, 0b11_01_10_00) } - } - - #[inline] - fn mul_vector(&self, other: XY<f32>) -> XY<f32> { - unsafe { - let abcd = *self; - let xxyy = _mm_set_ps(other.y, other.y, other.x, other.x); - let axbxcydy = _mm_mul_ps(abcd, xxyy); - let cydyaxbx = _mm_shuffle_ps(axbxcydy, axbxcydy, 0b01_00_11_10); - let result = _mm_add_ps(axbxcydy, cydyaxbx); - let mut out: MaybeUninit<Align16<XY<f32>>> = MaybeUninit::uninit(); - _mm_store_ps(out.as_mut_ptr().cast(), result); - out.assume_init().0 - } - } - - #[inline] - fn mul_matrix(&self, other: &Self) -> Self { - unsafe { - let abcd = *self; - let other = *other; - let xxyy0 = _mm_shuffle_ps(other, other, 0b01_01_00_00); - let xxyy1 = _mm_shuffle_ps(other, other, 0b11_11_10_10); - let axbxcydy0 = _mm_mul_ps(abcd, xxyy0); - let axbxcydy1 = _mm_mul_ps(abcd, xxyy1); - let cydyaxbx0 = _mm_shuffle_ps(axbxcydy0, axbxcydy0, 0b01_00_11_10); - let cydyaxbx1 = _mm_shuffle_ps(axbxcydy1, axbxcydy1, 0b01_00_11_10); - let result0 = _mm_add_ps(axbxcydy0, cydyaxbx0); - let result1 = _mm_add_ps(axbxcydy1, cydyaxbx1); - _mm_shuffle_ps(result0, result1, 0b01_00_01_00) - } - } - - #[inline] - fn mul_scalar(&self, other: f32) -> Self { - unsafe { _mm_mul_ps(*self, _mm_set_ps1(other)) } - } - - #[inline] - fn add_matrix(&self, other: &Self) -> Self { - unsafe { _mm_add_ps(*self, *other) } - } - - #[inline] - fn sub_matrix(&self, other: &Self) -> Self { - unsafe { _mm_sub_ps(*self, *other) } - } -} - -impl FloatMatrix2x2<f32, XY<f32>> for __m128 { - #[inline] - fn abs_diff_eq(&self, other: &Self, max_abs_diff: f32) -> bool { - FloatVector4::abs_diff_eq(*self, *other, max_abs_diff) - } - - #[inline] - fn inverse(&self) -> Self { - unsafe { - const SIGN: __m128 = const_f32x4!([1.0, -1.0, -1.0, 1.0]); - let abcd = *self; - let dcba = _mm_shuffle_ps(abcd, abcd, 0b00_01_10_11); - let prod = _mm_mul_ps(abcd, dcba); - let sub = _mm_sub_ps(prod, _mm_shuffle_ps(prod, prod, 0b01_01_01_01)); - let det = _mm_shuffle_ps(sub, sub, 0b00_00_00_00); - let tmp = _mm_div_ps(SIGN, det); - glam_assert!(tmp.is_finite()); - let dbca = _mm_shuffle_ps(abcd, abcd, 0b00_10_01_11); - _mm_mul_ps(dbca, tmp) - } - } - - #[inline] - fn neg_matrix(&self) -> Self { - unsafe { _mm_xor_ps(*self, _mm_set1_ps(-0.0)) } - } -} - -impl MatrixConst for Columns3<__m128> { - const ZERO: Columns3<__m128> = Columns3 { - x_axis: __m128::ZERO, - y_axis: __m128::ZERO, - z_axis: __m128::ZERO, - }; - const IDENTITY: Columns3<__m128> = Columns3 { - x_axis: __m128::X, - y_axis: __m128::Y, - z_axis: __m128::Z, - }; -} - -impl NanConstEx for Columns3<__m128> { - const NAN: Columns3<__m128> = Columns3 { - x_axis: __m128::NAN, - y_axis: __m128::NAN, - z_axis: __m128::NAN, - }; -} - -impl Matrix<f32> for Columns3<__m128> {} - -impl Matrix3x3<f32, __m128> for Columns3<__m128> { - #[inline(always)] - fn from_cols(x_axis: __m128, y_axis: __m128, z_axis: __m128) -> Self { - Self { - x_axis, - y_axis, - z_axis, - } - } - - #[inline(always)] - fn x_axis(&self) -> &__m128 { - &self.x_axis - } - - #[inline(always)] - fn y_axis(&self) -> &__m128 { - &self.y_axis - } - - #[inline(always)] - fn z_axis(&self) -> &__m128 { - &self.z_axis - } - - #[inline] - fn transpose(&self) -> Self { - unsafe { - let tmp0 = _mm_shuffle_ps(self.x_axis, self.y_axis, 0b01_00_01_00); - let tmp1 = _mm_shuffle_ps(self.x_axis, self.y_axis, 0b11_10_11_10); - - Self { - x_axis: _mm_shuffle_ps(tmp0, self.z_axis, 0b00_00_10_00), - y_axis: _mm_shuffle_ps(tmp0, self.z_axis, 0b01_01_11_01), - z_axis: _mm_shuffle_ps(tmp1, self.z_axis, 0b10_10_10_00), - } - } - } -} - -impl FloatMatrix3x3<f32, __m128> for Columns3<__m128> { - #[inline] - fn transform_point2(&self, other: XY<f32>) -> XY<f32> { - let mut res = self.x_axis.mul_scalar(other.x); - res = self.y_axis.mul_scalar(other.y).add(res); - res = self.z_axis.add(res); - res.into() - } - - #[inline] - fn transform_vector2(&self, other: XY<f32>) -> XY<f32> { - let mut res = self.x_axis.mul_scalar(other.x); - res = self.y_axis.mul_scalar(other.y).add(res); - res.into() - } -} - -impl MatrixConst for Columns4<__m128> { - const ZERO: Columns4<__m128> = Columns4 { - x_axis: __m128::ZERO, - y_axis: __m128::ZERO, - z_axis: __m128::ZERO, - w_axis: __m128::ZERO, - }; - const IDENTITY: Columns4<__m128> = Columns4 { - x_axis: __m128::X, - y_axis: __m128::Y, - z_axis: __m128::Z, - w_axis: __m128::W, - }; -} - -impl NanConstEx for Columns4<__m128> { - const NAN: Columns4<__m128> = Columns4 { - x_axis: __m128::NAN, - y_axis: __m128::NAN, - z_axis: __m128::NAN, - w_axis: __m128::NAN, - }; -} - -impl Matrix<f32> for Columns4<__m128> {} - -impl Matrix4x4<f32, __m128> for Columns4<__m128> { - #[inline(always)] - fn from_cols(x_axis: __m128, y_axis: __m128, z_axis: __m128, w_axis: __m128) -> Self { - Self { - x_axis, - y_axis, - z_axis, - w_axis, - } - } - - #[inline(always)] - fn x_axis(&self) -> &__m128 { - &self.x_axis - } - - #[inline(always)] - fn y_axis(&self) -> &__m128 { - &self.y_axis - } - - #[inline(always)] - fn z_axis(&self) -> &__m128 { - &self.z_axis - } - - #[inline(always)] - fn w_axis(&self) -> &__m128 { - &self.w_axis - } - - #[inline] - fn determinant(&self) -> f32 { - unsafe { - // SubFactor00 = m[2][2] * m[3][3] - m[3][2] * m[2][3]; - // SubFactor01 = m[2][1] * m[3][3] - m[3][1] * m[2][3]; - // SubFactor02 = m[2][1] * m[3][2] - m[3][1] * m[2][2]; - // SubFactor03 = m[2][0] * m[3][3] - m[3][0] * m[2][3]; - // SubFactor04 = m[2][0] * m[3][2] - m[3][0] * m[2][2]; - // SubFactor05 = m[2][0] * m[3][1] - m[3][0] * m[2][1]; - - // Based on https://github.com/g-truc/glm `glm_mat4_determinant_lowp` - let swp2a = _mm_shuffle_ps(self.z_axis, self.z_axis, 0b00_01_01_10); - let swp3a = _mm_shuffle_ps(self.w_axis, self.w_axis, 0b11_10_11_11); - let swp2b = _mm_shuffle_ps(self.z_axis, self.z_axis, 0b11_10_11_11); - let swp3b = _mm_shuffle_ps(self.w_axis, self.w_axis, 0b00_01_01_10); - let swp2c = _mm_shuffle_ps(self.z_axis, self.z_axis, 0b00_00_01_10); - let swp3c = _mm_shuffle_ps(self.w_axis, self.w_axis, 0b01_10_00_00); - - let mula = _mm_mul_ps(swp2a, swp3a); - let mulb = _mm_mul_ps(swp2b, swp3b); - let mulc = _mm_mul_ps(swp2c, swp3c); - let sube = _mm_sub_ps(mula, mulb); - let subf = _mm_sub_ps(_mm_movehl_ps(mulc, mulc), mulc); - - let subfaca = _mm_shuffle_ps(sube, sube, 0b10_01_00_00); - let swpfaca = _mm_shuffle_ps(self.y_axis, self.y_axis, 0b00_00_00_01); - let mulfaca = _mm_mul_ps(swpfaca, subfaca); - - let subtmpb = _mm_shuffle_ps(sube, subf, 0b00_00_11_01); - let subfacb = _mm_shuffle_ps(subtmpb, subtmpb, 0b11_01_01_00); - let swpfacb = _mm_shuffle_ps(self.y_axis, self.y_axis, 0b01_01_10_10); - let mulfacb = _mm_mul_ps(swpfacb, subfacb); - - let subres = _mm_sub_ps(mulfaca, mulfacb); - let subtmpc = _mm_shuffle_ps(sube, subf, 0b01_00_10_10); - let subfacc = _mm_shuffle_ps(subtmpc, subtmpc, 0b11_11_10_00); - let swpfacc = _mm_shuffle_ps(self.y_axis, self.y_axis, 0b10_11_11_11); - let mulfacc = _mm_mul_ps(swpfacc, subfacc); - - let addres = _mm_add_ps(subres, mulfacc); - let detcof = _mm_mul_ps(addres, _mm_setr_ps(1.0, -1.0, 1.0, -1.0)); - - Vector4::dot(self.x_axis, detcof) - } - } - - #[inline] - fn transpose(&self) -> Self { - unsafe { - // Based on https://github.com/microsoft/DirectXMath `XMMatrixTranspose` - let tmp0 = _mm_shuffle_ps(self.x_axis, self.y_axis, 0b01_00_01_00); - let tmp1 = _mm_shuffle_ps(self.x_axis, self.y_axis, 0b11_10_11_10); - let tmp2 = _mm_shuffle_ps(self.z_axis, self.w_axis, 0b01_00_01_00); - let tmp3 = _mm_shuffle_ps(self.z_axis, self.w_axis, 0b11_10_11_10); - - Self { - x_axis: _mm_shuffle_ps(tmp0, tmp2, 0b10_00_10_00), - y_axis: _mm_shuffle_ps(tmp0, tmp2, 0b11_01_11_01), - z_axis: _mm_shuffle_ps(tmp1, tmp3, 0b10_00_10_00), - w_axis: _mm_shuffle_ps(tmp1, tmp3, 0b11_01_11_01), - } - } - } -} - -impl FloatMatrix4x4<f32, __m128> for Columns4<__m128> { - type SIMDVector3 = __m128; - - fn inverse(&self) -> Self { - unsafe { - // Based on https://github.com/g-truc/glm `glm_mat4_inverse` - let fac0 = { - let swp0a = _mm_shuffle_ps(self.w_axis, self.z_axis, 0b11_11_11_11); - let swp0b = _mm_shuffle_ps(self.w_axis, self.z_axis, 0b10_10_10_10); - - let swp00 = _mm_shuffle_ps(self.z_axis, self.y_axis, 0b10_10_10_10); - let swp01 = _mm_shuffle_ps(swp0a, swp0a, 0b10_00_00_00); - let swp02 = _mm_shuffle_ps(swp0b, swp0b, 0b10_00_00_00); - let swp03 = _mm_shuffle_ps(self.z_axis, self.y_axis, 0b11_11_11_11); - - let mul00 = _mm_mul_ps(swp00, swp01); - let mul01 = _mm_mul_ps(swp02, swp03); - _mm_sub_ps(mul00, mul01) - }; - let fac1 = { - let swp0a = _mm_shuffle_ps(self.w_axis, self.z_axis, 0b11_11_11_11); - let swp0b = _mm_shuffle_ps(self.w_axis, self.z_axis, 0b01_01_01_01); - - let swp00 = _mm_shuffle_ps(self.z_axis, self.y_axis, 0b01_01_01_01); - let swp01 = _mm_shuffle_ps(swp0a, swp0a, 0b10_00_00_00); - let swp02 = _mm_shuffle_ps(swp0b, swp0b, 0b10_00_00_00); - let swp03 = _mm_shuffle_ps(self.z_axis, self.y_axis, 0b11_11_11_11); - - let mul00 = _mm_mul_ps(swp00, swp01); - let mul01 = _mm_mul_ps(swp02, swp03); - _mm_sub_ps(mul00, mul01) - }; - let fac2 = { - let swp0a = _mm_shuffle_ps(self.w_axis, self.z_axis, 0b10_10_10_10); - let swp0b = _mm_shuffle_ps(self.w_axis, self.z_axis, 0b01_01_01_01); - - let swp00 = _mm_shuffle_ps(self.z_axis, self.y_axis, 0b01_01_01_01); - let swp01 = _mm_shuffle_ps(swp0a, swp0a, 0b10_00_00_00); - let swp02 = _mm_shuffle_ps(swp0b, swp0b, 0b10_00_00_00); - let swp03 = _mm_shuffle_ps(self.z_axis, self.y_axis, 0b10_10_10_10); - - let mul00 = _mm_mul_ps(swp00, swp01); - let mul01 = _mm_mul_ps(swp02, swp03); - _mm_sub_ps(mul00, mul01) - }; - let fac3 = { - let swp0a = _mm_shuffle_ps(self.w_axis, self.z_axis, 0b11_11_11_11); - let swp0b = _mm_shuffle_ps(self.w_axis, self.z_axis, 0b00_00_00_00); - - let swp00 = _mm_shuffle_ps(self.z_axis, self.y_axis, 0b00_00_00_00); - let swp01 = _mm_shuffle_ps(swp0a, swp0a, 0b10_00_00_00); - let swp02 = _mm_shuffle_ps(swp0b, swp0b, 0b10_00_00_00); - let swp03 = _mm_shuffle_ps(self.z_axis, self.y_axis, 0b11_11_11_11); - - let mul00 = _mm_mul_ps(swp00, swp01); - let mul01 = _mm_mul_ps(swp02, swp03); - _mm_sub_ps(mul00, mul01) - }; - let fac4 = { - let swp0a = _mm_shuffle_ps(self.w_axis, self.z_axis, 0b10_10_10_10); - let swp0b = _mm_shuffle_ps(self.w_axis, self.z_axis, 0b00_00_00_00); - - let swp00 = _mm_shuffle_ps(self.z_axis, self.y_axis, 0b00_00_00_00); - let swp01 = _mm_shuffle_ps(swp0a, swp0a, 0b10_00_00_00); - let swp02 = _mm_shuffle_ps(swp0b, swp0b, 0b10_00_00_00); - let swp03 = _mm_shuffle_ps(self.z_axis, self.y_axis, 0b10_10_10_10); - - let mul00 = _mm_mul_ps(swp00, swp01); - let mul01 = _mm_mul_ps(swp02, swp03); - _mm_sub_ps(mul00, mul01) - }; - let fac5 = { - let swp0a = _mm_shuffle_ps(self.w_axis, self.z_axis, 0b01_01_01_01); - let swp0b = _mm_shuffle_ps(self.w_axis, self.z_axis, 0b00_00_00_00); - - let swp00 = _mm_shuffle_ps(self.z_axis, self.y_axis, 0b00_00_00_00); - let swp01 = _mm_shuffle_ps(swp0a, swp0a, 0b10_00_00_00); - let swp02 = _mm_shuffle_ps(swp0b, swp0b, 0b10_00_00_00); - let swp03 = _mm_shuffle_ps(self.z_axis, self.y_axis, 0b01_01_01_01); - - let mul00 = _mm_mul_ps(swp00, swp01); - let mul01 = _mm_mul_ps(swp02, swp03); - _mm_sub_ps(mul00, mul01) - }; - let sign_a = _mm_set_ps(1.0, -1.0, 1.0, -1.0); - let sign_b = _mm_set_ps(-1.0, 1.0, -1.0, 1.0); - - let temp0 = _mm_shuffle_ps(self.y_axis, self.x_axis, 0b00_00_00_00); - let vec0 = _mm_shuffle_ps(temp0, temp0, 0b10_10_10_00); - - let temp1 = _mm_shuffle_ps(self.y_axis, self.x_axis, 0b01_01_01_01); - let vec1 = _mm_shuffle_ps(temp1, temp1, 0b10_10_10_00); - - let temp2 = _mm_shuffle_ps(self.y_axis, self.x_axis, 0b10_10_10_10); - let vec2 = _mm_shuffle_ps(temp2, temp2, 0b10_10_10_00); - - let temp3 = _mm_shuffle_ps(self.y_axis, self.x_axis, 0b11_11_11_11); - let vec3 = _mm_shuffle_ps(temp3, temp3, 0b10_10_10_00); - - let mul00 = _mm_mul_ps(vec1, fac0); - let mul01 = _mm_mul_ps(vec2, fac1); - let mul02 = _mm_mul_ps(vec3, fac2); - let sub00 = _mm_sub_ps(mul00, mul01); - let add00 = _mm_add_ps(sub00, mul02); - let inv0 = _mm_mul_ps(sign_b, add00); - - let mul03 = _mm_mul_ps(vec0, fac0); - let mul04 = _mm_mul_ps(vec2, fac3); - let mul05 = _mm_mul_ps(vec3, fac4); - let sub01 = _mm_sub_ps(mul03, mul04); - let add01 = _mm_add_ps(sub01, mul05); - let inv1 = _mm_mul_ps(sign_a, add01); - - let mul06 = _mm_mul_ps(vec0, fac1); - let mul07 = _mm_mul_ps(vec1, fac3); - let mul08 = _mm_mul_ps(vec3, fac5); - let sub02 = _mm_sub_ps(mul06, mul07); - let add02 = _mm_add_ps(sub02, mul08); - let inv2 = _mm_mul_ps(sign_b, add02); - - let mul09 = _mm_mul_ps(vec0, fac2); - let mul10 = _mm_mul_ps(vec1, fac4); - let mul11 = _mm_mul_ps(vec2, fac5); - let sub03 = _mm_sub_ps(mul09, mul10); - let add03 = _mm_add_ps(sub03, mul11); - let inv3 = _mm_mul_ps(sign_a, add03); - - let row0 = _mm_shuffle_ps(inv0, inv1, 0b00_00_00_00); - let row1 = _mm_shuffle_ps(inv2, inv3, 0b00_00_00_00); - let row2 = _mm_shuffle_ps(row0, row1, 0b10_00_10_00); - - let dot0 = Vector4::dot(self.x_axis, row2); - glam_assert!(dot0 != 0.0); - - let rcp0 = _mm_set1_ps(dot0.recip()); - - Self { - x_axis: _mm_mul_ps(inv0, rcp0), - y_axis: _mm_mul_ps(inv1, rcp0), - z_axis: _mm_mul_ps(inv2, rcp0), - w_axis: _mm_mul_ps(inv3, rcp0), - } - } - } - - #[inline(always)] - fn transform_point3(&self, other: XYZ<f32>) -> XYZ<f32> { - self.x_axis - .mul_scalar(other.x) - .add(self.y_axis.mul_scalar(other.y)) - .add(self.z_axis.mul_scalar(other.z)) - .add(self.w_axis) - .into() - } - - #[inline(always)] - fn transform_vector3(&self, other: XYZ<f32>) -> XYZ<f32> { - self.x_axis - .mul_scalar(other.x) - .add(self.y_axis.mul_scalar(other.y)) - .add(self.z_axis.mul_scalar(other.z)) - .into() - } - - #[inline] - fn transform_float4_as_point3(&self, other: __m128) -> __m128 { - let mut res = self.x_axis.mul(Vector4::splat_x(other)); - res = self.y_axis.mul_add(Vector4::splat_y(other), res); - res = self.z_axis.mul_add(Vector4::splat_z(other), res); - res = self.w_axis.add(res); - res - } - - #[inline] - fn transform_float4_as_vector3(&self, other: __m128) -> __m128 { - let mut res = self.x_axis.mul(Vector4::splat_x(other)); - res = self.y_axis.mul_add(Vector4::splat_y(other), res); - res = self.z_axis.mul_add(Vector4::splat_z(other), res); - res - } - - #[inline] - fn project_float4_as_point3(&self, other: __m128) -> __m128 { - let mut res = self.x_axis.mul(Vector4::splat_x(other)); - res = self.y_axis.mul_add(Vector4::splat_y(other), res); - res = self.z_axis.mul_add(Vector4::splat_z(other), res); - res = self.w_axis.add(res); - res = res.mul(res.splat_w().recip()); - res - } -} - -impl ProjectionMatrix<f32, __m128> for Columns4<__m128> {} - -impl From<Columns3<XYZ<f32>>> for Columns3<__m128> { - #[inline(always)] - fn from(v: Columns3<XYZ<f32>>) -> Columns3<__m128> { - Self { - x_axis: v.x_axis.into(), - y_axis: v.y_axis.into(), - z_axis: v.z_axis.into(), - } - } -} - -impl From<Columns3<__m128>> for Columns3<XYZ<f32>> { - #[inline(always)] - fn from(v: Columns3<__m128>) -> Columns3<XYZ<f32>> { - Self { - x_axis: v.x_axis.into(), - y_axis: v.y_axis.into(), - z_axis: v.z_axis.into(), - } - } -} |