Bullet Collision Detection & Physics Library
btMatrix3x3.h
Go to the documentation of this file.
1 /*
2 Copyright (c) 2003-2006 Gino van den Bergen / Erwin Coumans http://continuousphysics.com/Bullet/
3 
4 This software is provided 'as-is', without any express or implied warranty.
5 In no event will the authors be held liable for any damages arising from the use of this software.
6 Permission is granted to anyone to use this software for any purpose,
7 including commercial applications, and to alter it and redistribute it freely,
8 subject to the following restrictions:
9 
10 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
11 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
12 3. This notice may not be removed or altered from any source distribution.
13 */
14 
15 
16 #ifndef BT_MATRIX3x3_H
17 #define BT_MATRIX3x3_H
18 
19 #include "btVector3.h"
20 #include "btQuaternion.h"
21 #include <stdio.h>
22 
23 #ifdef BT_USE_SSE
24 //const __m128 ATTRIBUTE_ALIGNED16(v2220) = {2.0f, 2.0f, 2.0f, 0.0f};
25 //const __m128 ATTRIBUTE_ALIGNED16(vMPPP) = {-0.0f, +0.0f, +0.0f, +0.0f};
26 #define vMPPP (_mm_set_ps (+0.0f, +0.0f, +0.0f, -0.0f))
27 #endif
28 
29 #if defined(BT_USE_SSE)
30 #define v1000 (_mm_set_ps(0.0f,0.0f,0.0f,1.0f))
31 #define v0100 (_mm_set_ps(0.0f,0.0f,1.0f,0.0f))
32 #define v0010 (_mm_set_ps(0.0f,1.0f,0.0f,0.0f))
33 #elif defined(BT_USE_NEON)
34 const btSimdFloat4 ATTRIBUTE_ALIGNED16(v1000) = {1.0f, 0.0f, 0.0f, 0.0f};
35 const btSimdFloat4 ATTRIBUTE_ALIGNED16(v0100) = {0.0f, 1.0f, 0.0f, 0.0f};
36 const btSimdFloat4 ATTRIBUTE_ALIGNED16(v0010) = {0.0f, 0.0f, 1.0f, 0.0f};
37 #endif
38 
39 #ifdef BT_USE_DOUBLE_PRECISION
40 #define btMatrix3x3Data btMatrix3x3DoubleData
41 #else
42 #define btMatrix3x3Data btMatrix3x3FloatData
43 #endif //BT_USE_DOUBLE_PRECISION
44 
45 
49 
51  btVector3 m_el[3];
52 
53 public:
56 
57  // explicit btMatrix3x3(const btScalar *m) { setFromOpenGLSubMatrix(m); }
58 
60  explicit btMatrix3x3(const btQuaternion& q) { setRotation(q); }
61  /*
62  template <typename btScalar>
63  Matrix3x3(const btScalar& yaw, const btScalar& pitch, const btScalar& roll)
64  {
65  setEulerYPR(yaw, pitch, roll);
66  }
67  */
69  btMatrix3x3(const btScalar& xx, const btScalar& xy, const btScalar& xz,
70  const btScalar& yx, const btScalar& yy, const btScalar& yz,
71  const btScalar& zx, const btScalar& zy, const btScalar& zz)
72  {
73  setValue(xx, xy, xz,
74  yx, yy, yz,
75  zx, zy, zz);
76  }
77 
78 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
79  SIMD_FORCE_INLINE btMatrix3x3 (const btSimdFloat4 v0, const btSimdFloat4 v1, const btSimdFloat4 v2 )
80  {
81  m_el[0].mVec128 = v0;
82  m_el[1].mVec128 = v1;
83  m_el[2].mVec128 = v2;
84  }
85 
86  SIMD_FORCE_INLINE btMatrix3x3 (const btVector3& v0, const btVector3& v1, const btVector3& v2 )
87  {
88  m_el[0] = v0;
89  m_el[1] = v1;
90  m_el[2] = v2;
91  }
92 
93  // Copy constructor
95  {
96  m_el[0].mVec128 = rhs.m_el[0].mVec128;
97  m_el[1].mVec128 = rhs.m_el[1].mVec128;
98  m_el[2].mVec128 = rhs.m_el[2].mVec128;
99  }
100 
101  // Assignment Operator
102  SIMD_FORCE_INLINE btMatrix3x3& operator=(const btMatrix3x3& m)
103  {
104  m_el[0].mVec128 = m.m_el[0].mVec128;
105  m_el[1].mVec128 = m.m_el[1].mVec128;
106  m_el[2].mVec128 = m.m_el[2].mVec128;
107 
108  return *this;
109  }
110 
111 #else
112 
115  {
116  m_el[0] = other.m_el[0];
117  m_el[1] = other.m_el[1];
118  m_el[2] = other.m_el[2];
119  }
120 
123  {
124  m_el[0] = other.m_el[0];
125  m_el[1] = other.m_el[1];
126  m_el[2] = other.m_el[2];
127  return *this;
128  }
129 
130 #endif
131 
135  {
136  return btVector3(m_el[0][i],m_el[1][i],m_el[2][i]);
137  }
138 
139 
142  SIMD_FORCE_INLINE const btVector3& getRow(int i) const
143  {
144  btFullAssert(0 <= i && i < 3);
145  return m_el[i];
146  }
147 
151  {
152  btFullAssert(0 <= i && i < 3);
153  return m_el[i];
154  }
155 
159  {
160  btFullAssert(0 <= i && i < 3);
161  return m_el[i];
162  }
163 
167  btMatrix3x3& operator*=(const btMatrix3x3& m);
168 
172  btMatrix3x3& operator+=(const btMatrix3x3& m);
173 
177  btMatrix3x3& operator-=(const btMatrix3x3& m);
178 
182  {
183  m_el[0].setValue(m[0],m[4],m[8]);
184  m_el[1].setValue(m[1],m[5],m[9]);
185  m_el[2].setValue(m[2],m[6],m[10]);
186 
187  }
198  void setValue(const btScalar& xx, const btScalar& xy, const btScalar& xz,
199  const btScalar& yx, const btScalar& yy, const btScalar& yz,
200  const btScalar& zx, const btScalar& zy, const btScalar& zz)
201  {
202  m_el[0].setValue(xx,xy,xz);
203  m_el[1].setValue(yx,yy,yz);
204  m_el[2].setValue(zx,zy,zz);
205  }
206 
209  void setRotation(const btQuaternion& q)
210  {
211  btScalar d = q.length2();
212  btFullAssert(d != btScalar(0.0));
213  btScalar s = btScalar(2.0) / d;
214 
215  #if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
216  __m128 vs, Q = q.get128();
217  __m128i Qi = btCastfTo128i(Q);
218  __m128 Y, Z;
219  __m128 V1, V2, V3;
220  __m128 V11, V21, V31;
221  __m128 NQ = _mm_xor_ps(Q, btvMzeroMask);
222  __m128i NQi = btCastfTo128i(NQ);
223 
224  V1 = btCastiTo128f(_mm_shuffle_epi32 (Qi, BT_SHUFFLE(1,0,2,3))); // Y X Z W
225  V2 = _mm_shuffle_ps(NQ, Q, BT_SHUFFLE(0,0,1,3)); // -X -X Y W
226  V3 = btCastiTo128f(_mm_shuffle_epi32 (Qi, BT_SHUFFLE(2,1,0,3))); // Z Y X W
227  V1 = _mm_xor_ps(V1, vMPPP); // change the sign of the first element
228 
229  V11 = btCastiTo128f(_mm_shuffle_epi32 (Qi, BT_SHUFFLE(1,1,0,3))); // Y Y X W
230  V21 = _mm_unpackhi_ps(Q, Q); // Z Z W W
231  V31 = _mm_shuffle_ps(Q, NQ, BT_SHUFFLE(0,2,0,3)); // X Z -X -W
232 
233  V2 = V2 * V1; //
234  V1 = V1 * V11; //
235  V3 = V3 * V31; //
236 
237  V11 = _mm_shuffle_ps(NQ, Q, BT_SHUFFLE(2,3,1,3)); // -Z -W Y W
238  V11 = V11 * V21; //
239  V21 = _mm_xor_ps(V21, vMPPP); // change the sign of the first element
240  V31 = _mm_shuffle_ps(Q, NQ, BT_SHUFFLE(3,3,1,3)); // W W -Y -W
241  V31 = _mm_xor_ps(V31, vMPPP); // change the sign of the first element
242  Y = btCastiTo128f(_mm_shuffle_epi32 (NQi, BT_SHUFFLE(3,2,0,3))); // -W -Z -X -W
243  Z = btCastiTo128f(_mm_shuffle_epi32 (Qi, BT_SHUFFLE(1,0,1,3))); // Y X Y W
244 
245  vs = _mm_load_ss(&s);
246  V21 = V21 * Y;
247  V31 = V31 * Z;
248 
249  V1 = V1 + V11;
250  V2 = V2 + V21;
251  V3 = V3 + V31;
252 
253  vs = bt_splat3_ps(vs, 0);
254  // s ready
255  V1 = V1 * vs;
256  V2 = V2 * vs;
257  V3 = V3 * vs;
258 
259  V1 = V1 + v1000;
260  V2 = V2 + v0100;
261  V3 = V3 + v0010;
262 
263  m_el[0] = V1;
264  m_el[1] = V2;
265  m_el[2] = V3;
266  #else
267  btScalar xs = q.x() * s, ys = q.y() * s, zs = q.z() * s;
268  btScalar wx = q.w() * xs, wy = q.w() * ys, wz = q.w() * zs;
269  btScalar xx = q.x() * xs, xy = q.x() * ys, xz = q.x() * zs;
270  btScalar yy = q.y() * ys, yz = q.y() * zs, zz = q.z() * zs;
271  setValue(
272  btScalar(1.0) - (yy + zz), xy - wz, xz + wy,
273  xy + wz, btScalar(1.0) - (xx + zz), yz - wx,
274  xz - wy, yz + wx, btScalar(1.0) - (xx + yy));
275  #endif
276  }
277 
278 
284  void setEulerYPR(const btScalar& yaw, const btScalar& pitch, const btScalar& roll)
285  {
286  setEulerZYX(roll, pitch, yaw);
287  }
288 
298  void setEulerZYX(btScalar eulerX,btScalar eulerY,btScalar eulerZ) {
300  btScalar ci ( btCos(eulerX));
301  btScalar cj ( btCos(eulerY));
302  btScalar ch ( btCos(eulerZ));
303  btScalar si ( btSin(eulerX));
304  btScalar sj ( btSin(eulerY));
305  btScalar sh ( btSin(eulerZ));
306  btScalar cc = ci * ch;
307  btScalar cs = ci * sh;
308  btScalar sc = si * ch;
309  btScalar ss = si * sh;
310 
311  setValue(cj * ch, sj * sc - cs, sj * cc + ss,
312  cj * sh, sj * ss + cc, sj * cs - sc,
313  -sj, cj * si, cj * ci);
314  }
315 
317  void setIdentity()
318  {
319 #if (defined(BT_USE_SSE_IN_API)&& defined (BT_USE_SSE)) || defined(BT_USE_NEON)
320  m_el[0] = v1000;
321  m_el[1] = v0100;
322  m_el[2] = v0010;
323 #else
324  setValue(btScalar(1.0), btScalar(0.0), btScalar(0.0),
325  btScalar(0.0), btScalar(1.0), btScalar(0.0),
326  btScalar(0.0), btScalar(0.0), btScalar(1.0));
327 #endif
328  }
329 
330  static const btMatrix3x3& getIdentity()
331  {
332 #if (defined(BT_USE_SSE_IN_API)&& defined (BT_USE_SSE)) || defined(BT_USE_NEON)
333  static const btMatrix3x3
334  identityMatrix(v1000, v0100, v0010);
335 #else
336  static const btMatrix3x3
337  identityMatrix(
338  btScalar(1.0), btScalar(0.0), btScalar(0.0),
339  btScalar(0.0), btScalar(1.0), btScalar(0.0),
340  btScalar(0.0), btScalar(0.0), btScalar(1.0));
341 #endif
342  return identityMatrix;
343  }
344 
347  void getOpenGLSubMatrix(btScalar *m) const
348  {
349 #if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
350  __m128 v0 = m_el[0].mVec128;
351  __m128 v1 = m_el[1].mVec128;
352  __m128 v2 = m_el[2].mVec128; // x2 y2 z2 w2
353  __m128 *vm = (__m128 *)m;
354  __m128 vT;
355 
356  v2 = _mm_and_ps(v2, btvFFF0fMask); // x2 y2 z2 0
357 
358  vT = _mm_unpackhi_ps(v0, v1); // z0 z1 * *
359  v0 = _mm_unpacklo_ps(v0, v1); // x0 x1 y0 y1
360 
361  v1 = _mm_shuffle_ps(v0, v2, BT_SHUFFLE(2, 3, 1, 3) ); // y0 y1 y2 0
362  v0 = _mm_shuffle_ps(v0, v2, BT_SHUFFLE(0, 1, 0, 3) ); // x0 x1 x2 0
363  v2 = btCastdTo128f(_mm_move_sd(btCastfTo128d(v2), btCastfTo128d(vT))); // z0 z1 z2 0
364 
365  vm[0] = v0;
366  vm[1] = v1;
367  vm[2] = v2;
368 #elif defined(BT_USE_NEON)
369  // note: zeros the w channel. We can preserve it at the cost of two more vtrn instructions.
370  static const uint32x2_t zMask = (const uint32x2_t) {static_cast<uint32_t>(-1), 0 };
371  float32x4_t *vm = (float32x4_t *)m;
372  float32x4x2_t top = vtrnq_f32( m_el[0].mVec128, m_el[1].mVec128 ); // {x0 x1 z0 z1}, {y0 y1 w0 w1}
373  float32x2x2_t bl = vtrn_f32( vget_low_f32(m_el[2].mVec128), vdup_n_f32(0.0f) ); // {x2 0 }, {y2 0}
374  float32x4_t v0 = vcombine_f32( vget_low_f32(top.val[0]), bl.val[0] );
375  float32x4_t v1 = vcombine_f32( vget_low_f32(top.val[1]), bl.val[1] );
376  float32x2_t q = (float32x2_t) vand_u32( (uint32x2_t) vget_high_f32( m_el[2].mVec128), zMask );
377  float32x4_t v2 = vcombine_f32( vget_high_f32(top.val[0]), q ); // z0 z1 z2 0
378 
379  vm[0] = v0;
380  vm[1] = v1;
381  vm[2] = v2;
382 #else
383  m[0] = btScalar(m_el[0].x());
384  m[1] = btScalar(m_el[1].x());
385  m[2] = btScalar(m_el[2].x());
386  m[3] = btScalar(0.0);
387  m[4] = btScalar(m_el[0].y());
388  m[5] = btScalar(m_el[1].y());
389  m[6] = btScalar(m_el[2].y());
390  m[7] = btScalar(0.0);
391  m[8] = btScalar(m_el[0].z());
392  m[9] = btScalar(m_el[1].z());
393  m[10] = btScalar(m_el[2].z());
394  m[11] = btScalar(0.0);
395 #endif
396  }
397 
400  void getRotation(btQuaternion& q) const
401  {
402 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
403  btScalar trace = m_el[0].x() + m_el[1].y() + m_el[2].z();
404  btScalar s, x;
405 
406  union {
407  btSimdFloat4 vec;
408  btScalar f[4];
409  } temp;
410 
411  if (trace > btScalar(0.0))
412  {
413  x = trace + btScalar(1.0);
414 
415  temp.f[0]=m_el[2].y() - m_el[1].z();
416  temp.f[1]=m_el[0].z() - m_el[2].x();
417  temp.f[2]=m_el[1].x() - m_el[0].y();
418  temp.f[3]=x;
419  //temp.f[3]= s * btScalar(0.5);
420  }
421  else
422  {
423  int i, j, k;
424  if(m_el[0].x() < m_el[1].y())
425  {
426  if( m_el[1].y() < m_el[2].z() )
427  { i = 2; j = 0; k = 1; }
428  else
429  { i = 1; j = 2; k = 0; }
430  }
431  else
432  {
433  if( m_el[0].x() < m_el[2].z())
434  { i = 2; j = 0; k = 1; }
435  else
436  { i = 0; j = 1; k = 2; }
437  }
438 
439  x = m_el[i][i] - m_el[j][j] - m_el[k][k] + btScalar(1.0);
440 
441  temp.f[3] = (m_el[k][j] - m_el[j][k]);
442  temp.f[j] = (m_el[j][i] + m_el[i][j]);
443  temp.f[k] = (m_el[k][i] + m_el[i][k]);
444  temp.f[i] = x;
445  //temp.f[i] = s * btScalar(0.5);
446  }
447 
448  s = btSqrt(x);
449  q.set128(temp.vec);
450  s = btScalar(0.5) / s;
451 
452  q *= s;
453 #else
454  btScalar trace = m_el[0].x() + m_el[1].y() + m_el[2].z();
455 
456  btScalar temp[4];
457 
458  if (trace > btScalar(0.0))
459  {
460  btScalar s = btSqrt(trace + btScalar(1.0));
461  temp[3]=(s * btScalar(0.5));
462  s = btScalar(0.5) / s;
463 
464  temp[0]=((m_el[2].y() - m_el[1].z()) * s);
465  temp[1]=((m_el[0].z() - m_el[2].x()) * s);
466  temp[2]=((m_el[1].x() - m_el[0].y()) * s);
467  }
468  else
469  {
470  int i = m_el[0].x() < m_el[1].y() ?
471  (m_el[1].y() < m_el[2].z() ? 2 : 1) :
472  (m_el[0].x() < m_el[2].z() ? 2 : 0);
473  int j = (i + 1) % 3;
474  int k = (i + 2) % 3;
475 
476  btScalar s = btSqrt(m_el[i][i] - m_el[j][j] - m_el[k][k] + btScalar(1.0));
477  temp[i] = s * btScalar(0.5);
478  s = btScalar(0.5) / s;
479 
480  temp[3] = (m_el[k][j] - m_el[j][k]) * s;
481  temp[j] = (m_el[j][i] + m_el[i][j]) * s;
482  temp[k] = (m_el[k][i] + m_el[i][k]) * s;
483  }
484  q.setValue(temp[0],temp[1],temp[2],temp[3]);
485 #endif
486  }
487 
492  void getEulerYPR(btScalar& yaw, btScalar& pitch, btScalar& roll) const
493  {
494 
495  // first use the normal calculus
496  yaw = btScalar(btAtan2(m_el[1].x(), m_el[0].x()));
497  pitch = btScalar(btAsin(-m_el[2].x()));
498  roll = btScalar(btAtan2(m_el[2].y(), m_el[2].z()));
499 
500  // on pitch = +/-HalfPI
501  if (btFabs(pitch)==SIMD_HALF_PI)
502  {
503  if (yaw>0)
504  yaw-=SIMD_PI;
505  else
506  yaw+=SIMD_PI;
507 
508  if (roll>0)
509  roll-=SIMD_PI;
510  else
511  roll+=SIMD_PI;
512  }
513  };
514 
515 
521  void getEulerZYX(btScalar& yaw, btScalar& pitch, btScalar& roll, unsigned int solution_number = 1) const
522  {
523  struct Euler
524  {
525  btScalar yaw;
526  btScalar pitch;
527  btScalar roll;
528  };
529 
530  Euler euler_out;
531  Euler euler_out2; //second solution
532  //get the pointer to the raw data
533 
534  // Check that pitch is not at a singularity
535  if (btFabs(m_el[2].x()) >= 1)
536  {
537  euler_out.yaw = 0;
538  euler_out2.yaw = 0;
539 
540  // From difference of angles formula
541  btScalar delta = btAtan2(m_el[0].x(),m_el[0].z());
542  if (m_el[2].x() > 0) //gimbal locked up
543  {
544  euler_out.pitch = SIMD_PI / btScalar(2.0);
545  euler_out2.pitch = SIMD_PI / btScalar(2.0);
546  euler_out.roll = euler_out.pitch + delta;
547  euler_out2.roll = euler_out.pitch + delta;
548  }
549  else // gimbal locked down
550  {
551  euler_out.pitch = -SIMD_PI / btScalar(2.0);
552  euler_out2.pitch = -SIMD_PI / btScalar(2.0);
553  euler_out.roll = -euler_out.pitch + delta;
554  euler_out2.roll = -euler_out.pitch + delta;
555  }
556  }
557  else
558  {
559  euler_out.pitch = - btAsin(m_el[2].x());
560  euler_out2.pitch = SIMD_PI - euler_out.pitch;
561 
562  euler_out.roll = btAtan2(m_el[2].y()/btCos(euler_out.pitch),
563  m_el[2].z()/btCos(euler_out.pitch));
564  euler_out2.roll = btAtan2(m_el[2].y()/btCos(euler_out2.pitch),
565  m_el[2].z()/btCos(euler_out2.pitch));
566 
567  euler_out.yaw = btAtan2(m_el[1].x()/btCos(euler_out.pitch),
568  m_el[0].x()/btCos(euler_out.pitch));
569  euler_out2.yaw = btAtan2(m_el[1].x()/btCos(euler_out2.pitch),
570  m_el[0].x()/btCos(euler_out2.pitch));
571  }
572 
573  if (solution_number == 1)
574  {
575  yaw = euler_out.yaw;
576  pitch = euler_out.pitch;
577  roll = euler_out.roll;
578  }
579  else
580  {
581  yaw = euler_out2.yaw;
582  pitch = euler_out2.pitch;
583  roll = euler_out2.roll;
584  }
585  }
586 
590  btMatrix3x3 scaled(const btVector3& s) const
591  {
592 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
593  return btMatrix3x3(m_el[0] * s, m_el[1] * s, m_el[2] * s);
594 #else
595  return btMatrix3x3(
596  m_el[0].x() * s.x(), m_el[0].y() * s.y(), m_el[0].z() * s.z(),
597  m_el[1].x() * s.x(), m_el[1].y() * s.y(), m_el[1].z() * s.z(),
598  m_el[2].x() * s.x(), m_el[2].y() * s.y(), m_el[2].z() * s.z());
599 #endif
600  }
601 
603  btScalar determinant() const;
605  btMatrix3x3 adjoint() const;
607  btMatrix3x3 absolute() const;
609  btMatrix3x3 transpose() const;
611  btMatrix3x3 inverse() const;
612 
616  btVector3 solve33(const btVector3& b) const
617  {
618  btVector3 col1 = getColumn(0);
619  btVector3 col2 = getColumn(1);
620  btVector3 col3 = getColumn(2);
621 
622  btScalar det = btDot(col1, btCross(col2, col3));
623  if (btFabs(det)>SIMD_EPSILON)
624  {
625  det = 1.0f / det;
626  }
627  btVector3 x;
628  x[0] = det * btDot(b, btCross(col2, col3));
629  x[1] = det * btDot(col1, btCross(b, col3));
630  x[2] = det * btDot(col1, btCross(col2, b));
631  return x;
632  }
633 
634  btMatrix3x3 transposeTimes(const btMatrix3x3& m) const;
635  btMatrix3x3 timesTranspose(const btMatrix3x3& m) const;
636 
638  {
639  return m_el[0].x() * v.x() + m_el[1].x() * v.y() + m_el[2].x() * v.z();
640  }
642  {
643  return m_el[0].y() * v.x() + m_el[1].y() * v.y() + m_el[2].y() * v.z();
644  }
646  {
647  return m_el[0].z() * v.x() + m_el[1].z() * v.y() + m_el[2].z() * v.z();
648  }
649 
650 
660  void diagonalize(btMatrix3x3& rot, btScalar threshold, int maxSteps)
661  {
662  rot.setIdentity();
663  for (int step = maxSteps; step > 0; step--)
664  {
665  // find off-diagonal element [p][q] with largest magnitude
666  int p = 0;
667  int q = 1;
668  int r = 2;
669  btScalar max = btFabs(m_el[0][1]);
670  btScalar v = btFabs(m_el[0][2]);
671  if (v > max)
672  {
673  q = 2;
674  r = 1;
675  max = v;
676  }
677  v = btFabs(m_el[1][2]);
678  if (v > max)
679  {
680  p = 1;
681  q = 2;
682  r = 0;
683  max = v;
684  }
685 
686  btScalar t = threshold * (btFabs(m_el[0][0]) + btFabs(m_el[1][1]) + btFabs(m_el[2][2]));
687  if (max <= t)
688  {
689  if (max <= SIMD_EPSILON * t)
690  {
691  return;
692  }
693  step = 1;
694  }
695 
696  // compute Jacobi rotation J which leads to a zero for element [p][q]
697  btScalar mpq = m_el[p][q];
698  btScalar theta = (m_el[q][q] - m_el[p][p]) / (2 * mpq);
699  btScalar theta2 = theta * theta;
700  btScalar cos;
701  btScalar sin;
702  if (theta2 * theta2 < btScalar(10 / SIMD_EPSILON))
703  {
704  t = (theta >= 0) ? 1 / (theta + btSqrt(1 + theta2))
705  : 1 / (theta - btSqrt(1 + theta2));
706  cos = 1 / btSqrt(1 + t * t);
707  sin = cos * t;
708  }
709  else
710  {
711  // approximation for large theta-value, i.e., a nearly diagonal matrix
712  t = 1 / (theta * (2 + btScalar(0.5) / theta2));
713  cos = 1 - btScalar(0.5) * t * t;
714  sin = cos * t;
715  }
716 
717  // apply rotation to matrix (this = J^T * this * J)
718  m_el[p][q] = m_el[q][p] = 0;
719  m_el[p][p] -= t * mpq;
720  m_el[q][q] += t * mpq;
721  btScalar mrp = m_el[r][p];
722  btScalar mrq = m_el[r][q];
723  m_el[r][p] = m_el[p][r] = cos * mrp - sin * mrq;
724  m_el[r][q] = m_el[q][r] = cos * mrq + sin * mrp;
725 
726  // apply rotation to rot (rot = rot * J)
727  for (int i = 0; i < 3; i++)
728  {
729  btVector3& row = rot[i];
730  mrp = row[p];
731  mrq = row[q];
732  row[p] = cos * mrp - sin * mrq;
733  row[q] = cos * mrq + sin * mrp;
734  }
735  }
736  }
737 
738 
739 
740 
748  btScalar cofac(int r1, int c1, int r2, int c2) const
749  {
750  return m_el[r1][c1] * m_el[r2][c2] - m_el[r1][c2] * m_el[r2][c1];
751  }
752 
753  void serialize(struct btMatrix3x3Data& dataOut) const;
754 
755  void serializeFloat(struct btMatrix3x3FloatData& dataOut) const;
756 
757  void deSerialize(const struct btMatrix3x3Data& dataIn);
758 
759  void deSerializeFloat(const struct btMatrix3x3FloatData& dataIn);
760 
761  void deSerializeDouble(const struct btMatrix3x3DoubleData& dataIn);
762 
763 };
764 
765 
768 {
769 #if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
770  __m128 rv00, rv01, rv02;
771  __m128 rv10, rv11, rv12;
772  __m128 rv20, rv21, rv22;
773  __m128 mv0, mv1, mv2;
774 
775  rv02 = m_el[0].mVec128;
776  rv12 = m_el[1].mVec128;
777  rv22 = m_el[2].mVec128;
778 
779  mv0 = _mm_and_ps(m[0].mVec128, btvFFF0fMask);
780  mv1 = _mm_and_ps(m[1].mVec128, btvFFF0fMask);
781  mv2 = _mm_and_ps(m[2].mVec128, btvFFF0fMask);
782 
783  // rv0
784  rv00 = bt_splat_ps(rv02, 0);
785  rv01 = bt_splat_ps(rv02, 1);
786  rv02 = bt_splat_ps(rv02, 2);
787 
788  rv00 = _mm_mul_ps(rv00, mv0);
789  rv01 = _mm_mul_ps(rv01, mv1);
790  rv02 = _mm_mul_ps(rv02, mv2);
791 
792  // rv1
793  rv10 = bt_splat_ps(rv12, 0);
794  rv11 = bt_splat_ps(rv12, 1);
795  rv12 = bt_splat_ps(rv12, 2);
796 
797  rv10 = _mm_mul_ps(rv10, mv0);
798  rv11 = _mm_mul_ps(rv11, mv1);
799  rv12 = _mm_mul_ps(rv12, mv2);
800 
801  // rv2
802  rv20 = bt_splat_ps(rv22, 0);
803  rv21 = bt_splat_ps(rv22, 1);
804  rv22 = bt_splat_ps(rv22, 2);
805 
806  rv20 = _mm_mul_ps(rv20, mv0);
807  rv21 = _mm_mul_ps(rv21, mv1);
808  rv22 = _mm_mul_ps(rv22, mv2);
809 
810  rv00 = _mm_add_ps(rv00, rv01);
811  rv10 = _mm_add_ps(rv10, rv11);
812  rv20 = _mm_add_ps(rv20, rv21);
813 
814  m_el[0].mVec128 = _mm_add_ps(rv00, rv02);
815  m_el[1].mVec128 = _mm_add_ps(rv10, rv12);
816  m_el[2].mVec128 = _mm_add_ps(rv20, rv22);
817 
818 #elif defined(BT_USE_NEON)
819 
820  float32x4_t rv0, rv1, rv2;
821  float32x4_t v0, v1, v2;
822  float32x4_t mv0, mv1, mv2;
823 
824  v0 = m_el[0].mVec128;
825  v1 = m_el[1].mVec128;
826  v2 = m_el[2].mVec128;
827 
828  mv0 = (float32x4_t) vandq_s32((int32x4_t)m[0].mVec128, btvFFF0Mask);
829  mv1 = (float32x4_t) vandq_s32((int32x4_t)m[1].mVec128, btvFFF0Mask);
830  mv2 = (float32x4_t) vandq_s32((int32x4_t)m[2].mVec128, btvFFF0Mask);
831 
832  rv0 = vmulq_lane_f32(mv0, vget_low_f32(v0), 0);
833  rv1 = vmulq_lane_f32(mv0, vget_low_f32(v1), 0);
834  rv2 = vmulq_lane_f32(mv0, vget_low_f32(v2), 0);
835 
836  rv0 = vmlaq_lane_f32(rv0, mv1, vget_low_f32(v0), 1);
837  rv1 = vmlaq_lane_f32(rv1, mv1, vget_low_f32(v1), 1);
838  rv2 = vmlaq_lane_f32(rv2, mv1, vget_low_f32(v2), 1);
839 
840  rv0 = vmlaq_lane_f32(rv0, mv2, vget_high_f32(v0), 0);
841  rv1 = vmlaq_lane_f32(rv1, mv2, vget_high_f32(v1), 0);
842  rv2 = vmlaq_lane_f32(rv2, mv2, vget_high_f32(v2), 0);
843 
844  m_el[0].mVec128 = rv0;
845  m_el[1].mVec128 = rv1;
846  m_el[2].mVec128 = rv2;
847 #else
848  setValue(
849  m.tdotx(m_el[0]), m.tdoty(m_el[0]), m.tdotz(m_el[0]),
850  m.tdotx(m_el[1]), m.tdoty(m_el[1]), m.tdotz(m_el[1]),
851  m.tdotx(m_el[2]), m.tdoty(m_el[2]), m.tdotz(m_el[2]));
852 #endif
853  return *this;
854 }
855 
858 {
859 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
860  m_el[0].mVec128 = m_el[0].mVec128 + m.m_el[0].mVec128;
861  m_el[1].mVec128 = m_el[1].mVec128 + m.m_el[1].mVec128;
862  m_el[2].mVec128 = m_el[2].mVec128 + m.m_el[2].mVec128;
863 #else
864  setValue(
865  m_el[0][0]+m.m_el[0][0],
866  m_el[0][1]+m.m_el[0][1],
867  m_el[0][2]+m.m_el[0][2],
868  m_el[1][0]+m.m_el[1][0],
869  m_el[1][1]+m.m_el[1][1],
870  m_el[1][2]+m.m_el[1][2],
871  m_el[2][0]+m.m_el[2][0],
872  m_el[2][1]+m.m_el[2][1],
873  m_el[2][2]+m.m_el[2][2]);
874 #endif
875  return *this;
876 }
877 
879 operator*(const btMatrix3x3& m, const btScalar & k)
880 {
881 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
882  __m128 vk = bt_splat_ps(_mm_load_ss((float *)&k), 0x80);
883  return btMatrix3x3(
884  _mm_mul_ps(m[0].mVec128, vk),
885  _mm_mul_ps(m[1].mVec128, vk),
886  _mm_mul_ps(m[2].mVec128, vk));
887 #elif defined(BT_USE_NEON)
888  return btMatrix3x3(
889  vmulq_n_f32(m[0].mVec128, k),
890  vmulq_n_f32(m[1].mVec128, k),
891  vmulq_n_f32(m[2].mVec128, k));
892 #else
893  return btMatrix3x3(
894  m[0].x()*k,m[0].y()*k,m[0].z()*k,
895  m[1].x()*k,m[1].y()*k,m[1].z()*k,
896  m[2].x()*k,m[2].y()*k,m[2].z()*k);
897 #endif
898 }
899 
901 operator+(const btMatrix3x3& m1, const btMatrix3x3& m2)
902 {
903 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
904  return btMatrix3x3(
905  m1[0].mVec128 + m2[0].mVec128,
906  m1[1].mVec128 + m2[1].mVec128,
907  m1[2].mVec128 + m2[2].mVec128);
908 #else
909  return btMatrix3x3(
910  m1[0][0]+m2[0][0],
911  m1[0][1]+m2[0][1],
912  m1[0][2]+m2[0][2],
913 
914  m1[1][0]+m2[1][0],
915  m1[1][1]+m2[1][1],
916  m1[1][2]+m2[1][2],
917 
918  m1[2][0]+m2[2][0],
919  m1[2][1]+m2[2][1],
920  m1[2][2]+m2[2][2]);
921 #endif
922 }
923 
925 operator-(const btMatrix3x3& m1, const btMatrix3x3& m2)
926 {
927 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
928  return btMatrix3x3(
929  m1[0].mVec128 - m2[0].mVec128,
930  m1[1].mVec128 - m2[1].mVec128,
931  m1[2].mVec128 - m2[2].mVec128);
932 #else
933  return btMatrix3x3(
934  m1[0][0]-m2[0][0],
935  m1[0][1]-m2[0][1],
936  m1[0][2]-m2[0][2],
937 
938  m1[1][0]-m2[1][0],
939  m1[1][1]-m2[1][1],
940  m1[1][2]-m2[1][2],
941 
942  m1[2][0]-m2[2][0],
943  m1[2][1]-m2[2][1],
944  m1[2][2]-m2[2][2]);
945 #endif
946 }
947 
948 
951 {
952 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
953  m_el[0].mVec128 = m_el[0].mVec128 - m.m_el[0].mVec128;
954  m_el[1].mVec128 = m_el[1].mVec128 - m.m_el[1].mVec128;
955  m_el[2].mVec128 = m_el[2].mVec128 - m.m_el[2].mVec128;
956 #else
957  setValue(
958  m_el[0][0]-m.m_el[0][0],
959  m_el[0][1]-m.m_el[0][1],
960  m_el[0][2]-m.m_el[0][2],
961  m_el[1][0]-m.m_el[1][0],
962  m_el[1][1]-m.m_el[1][1],
963  m_el[1][2]-m.m_el[1][2],
964  m_el[2][0]-m.m_el[2][0],
965  m_el[2][1]-m.m_el[2][1],
966  m_el[2][2]-m.m_el[2][2]);
967 #endif
968  return *this;
969 }
970 
971 
974 {
975  return btTriple((*this)[0], (*this)[1], (*this)[2]);
976 }
977 
978 
981 {
982 #if defined BT_USE_SIMD_VECTOR3 && (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
983  return btMatrix3x3(
984  _mm_and_ps(m_el[0].mVec128, btvAbsfMask),
985  _mm_and_ps(m_el[1].mVec128, btvAbsfMask),
986  _mm_and_ps(m_el[2].mVec128, btvAbsfMask));
987 #elif defined(BT_USE_NEON)
988  return btMatrix3x3(
989  (float32x4_t)vandq_s32((int32x4_t)m_el[0].mVec128, btv3AbsMask),
990  (float32x4_t)vandq_s32((int32x4_t)m_el[1].mVec128, btv3AbsMask),
991  (float32x4_t)vandq_s32((int32x4_t)m_el[2].mVec128, btv3AbsMask));
992 #else
993  return btMatrix3x3(
994  btFabs(m_el[0].x()), btFabs(m_el[0].y()), btFabs(m_el[0].z()),
995  btFabs(m_el[1].x()), btFabs(m_el[1].y()), btFabs(m_el[1].z()),
996  btFabs(m_el[2].x()), btFabs(m_el[2].y()), btFabs(m_el[2].z()));
997 #endif
998 }
999 
1002 {
1003 #if defined BT_USE_SIMD_VECTOR3 && (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
1004  __m128 v0 = m_el[0].mVec128;
1005  __m128 v1 = m_el[1].mVec128;
1006  __m128 v2 = m_el[2].mVec128; // x2 y2 z2 w2
1007  __m128 vT;
1008 
1009  v2 = _mm_and_ps(v2, btvFFF0fMask); // x2 y2 z2 0
1010 
1011  vT = _mm_unpackhi_ps(v0, v1); // z0 z1 * *
1012  v0 = _mm_unpacklo_ps(v0, v1); // x0 x1 y0 y1
1013 
1014  v1 = _mm_shuffle_ps(v0, v2, BT_SHUFFLE(2, 3, 1, 3) ); // y0 y1 y2 0
1015  v0 = _mm_shuffle_ps(v0, v2, BT_SHUFFLE(0, 1, 0, 3) ); // x0 x1 x2 0
1016  v2 = btCastdTo128f(_mm_move_sd(btCastfTo128d(v2), btCastfTo128d(vT))); // z0 z1 z2 0
1017 
1018 
1019  return btMatrix3x3( v0, v1, v2 );
1020 #elif defined(BT_USE_NEON)
1021  // note: zeros the w channel. We can preserve it at the cost of two more vtrn instructions.
1022  static const uint32x2_t zMask = (const uint32x2_t) {static_cast<uint32_t>(-1), 0 };
1023  float32x4x2_t top = vtrnq_f32( m_el[0].mVec128, m_el[1].mVec128 ); // {x0 x1 z0 z1}, {y0 y1 w0 w1}
1024  float32x2x2_t bl = vtrn_f32( vget_low_f32(m_el[2].mVec128), vdup_n_f32(0.0f) ); // {x2 0 }, {y2 0}
1025  float32x4_t v0 = vcombine_f32( vget_low_f32(top.val[0]), bl.val[0] );
1026  float32x4_t v1 = vcombine_f32( vget_low_f32(top.val[1]), bl.val[1] );
1027  float32x2_t q = (float32x2_t) vand_u32( (uint32x2_t) vget_high_f32( m_el[2].mVec128), zMask );
1028  float32x4_t v2 = vcombine_f32( vget_high_f32(top.val[0]), q ); // z0 z1 z2 0
1029  return btMatrix3x3( v0, v1, v2 );
1030 #else
1031  return btMatrix3x3( m_el[0].x(), m_el[1].x(), m_el[2].x(),
1032  m_el[0].y(), m_el[1].y(), m_el[2].y(),
1033  m_el[0].z(), m_el[1].z(), m_el[2].z());
1034 #endif
1035 }
1036 
1039 {
1040  return btMatrix3x3(cofac(1, 1, 2, 2), cofac(0, 2, 2, 1), cofac(0, 1, 1, 2),
1041  cofac(1, 2, 2, 0), cofac(0, 0, 2, 2), cofac(0, 2, 1, 0),
1042  cofac(1, 0, 2, 1), cofac(0, 1, 2, 0), cofac(0, 0, 1, 1));
1043 }
1044 
1047 {
1048  btVector3 co(cofac(1, 1, 2, 2), cofac(1, 2, 2, 0), cofac(1, 0, 2, 1));
1049  btScalar det = (*this)[0].dot(co);
1050  btFullAssert(det != btScalar(0.0));
1051  btScalar s = btScalar(1.0) / det;
1052  return btMatrix3x3(co.x() * s, cofac(0, 2, 2, 1) * s, cofac(0, 1, 1, 2) * s,
1053  co.y() * s, cofac(0, 0, 2, 2) * s, cofac(0, 2, 1, 0) * s,
1054  co.z() * s, cofac(0, 1, 2, 0) * s, cofac(0, 0, 1, 1) * s);
1055 }
1056 
1059 {
1060 #if defined BT_USE_SIMD_VECTOR3 && (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
1061  // zeros w
1062 // static const __m128i xyzMask = (const __m128i){ -1ULL, 0xffffffffULL };
1063  __m128 row = m_el[0].mVec128;
1064  __m128 m0 = _mm_and_ps( m.getRow(0).mVec128, btvFFF0fMask );
1065  __m128 m1 = _mm_and_ps( m.getRow(1).mVec128, btvFFF0fMask);
1066  __m128 m2 = _mm_and_ps( m.getRow(2).mVec128, btvFFF0fMask );
1067  __m128 r0 = _mm_mul_ps(m0, _mm_shuffle_ps(row, row, 0));
1068  __m128 r1 = _mm_mul_ps(m0, _mm_shuffle_ps(row, row, 0x55));
1069  __m128 r2 = _mm_mul_ps(m0, _mm_shuffle_ps(row, row, 0xaa));
1070  row = m_el[1].mVec128;
1071  r0 = _mm_add_ps( r0, _mm_mul_ps(m1, _mm_shuffle_ps(row, row, 0)));
1072  r1 = _mm_add_ps( r1, _mm_mul_ps(m1, _mm_shuffle_ps(row, row, 0x55)));
1073  r2 = _mm_add_ps( r2, _mm_mul_ps(m1, _mm_shuffle_ps(row, row, 0xaa)));
1074  row = m_el[2].mVec128;
1075  r0 = _mm_add_ps( r0, _mm_mul_ps(m2, _mm_shuffle_ps(row, row, 0)));
1076  r1 = _mm_add_ps( r1, _mm_mul_ps(m2, _mm_shuffle_ps(row, row, 0x55)));
1077  r2 = _mm_add_ps( r2, _mm_mul_ps(m2, _mm_shuffle_ps(row, row, 0xaa)));
1078  return btMatrix3x3( r0, r1, r2 );
1079 
1080 #elif defined BT_USE_NEON
1081  // zeros w
1082  static const uint32x4_t xyzMask = (const uint32x4_t){ static_cast<uint32_t>(-1), static_cast<uint32_t>(-1), static_cast<uint32_t>(-1), 0 };
1083  float32x4_t m0 = (float32x4_t) vandq_u32( (uint32x4_t) m.getRow(0).mVec128, xyzMask );
1084  float32x4_t m1 = (float32x4_t) vandq_u32( (uint32x4_t) m.getRow(1).mVec128, xyzMask );
1085  float32x4_t m2 = (float32x4_t) vandq_u32( (uint32x4_t) m.getRow(2).mVec128, xyzMask );
1086  float32x4_t row = m_el[0].mVec128;
1087  float32x4_t r0 = vmulq_lane_f32( m0, vget_low_f32(row), 0);
1088  float32x4_t r1 = vmulq_lane_f32( m0, vget_low_f32(row), 1);
1089  float32x4_t r2 = vmulq_lane_f32( m0, vget_high_f32(row), 0);
1090  row = m_el[1].mVec128;
1091  r0 = vmlaq_lane_f32( r0, m1, vget_low_f32(row), 0);
1092  r1 = vmlaq_lane_f32( r1, m1, vget_low_f32(row), 1);
1093  r2 = vmlaq_lane_f32( r2, m1, vget_high_f32(row), 0);
1094  row = m_el[2].mVec128;
1095  r0 = vmlaq_lane_f32( r0, m2, vget_low_f32(row), 0);
1096  r1 = vmlaq_lane_f32( r1, m2, vget_low_f32(row), 1);
1097  r2 = vmlaq_lane_f32( r2, m2, vget_high_f32(row), 0);
1098  return btMatrix3x3( r0, r1, r2 );
1099 #else
1100  return btMatrix3x3(
1101  m_el[0].x() * m[0].x() + m_el[1].x() * m[1].x() + m_el[2].x() * m[2].x(),
1102  m_el[0].x() * m[0].y() + m_el[1].x() * m[1].y() + m_el[2].x() * m[2].y(),
1103  m_el[0].x() * m[0].z() + m_el[1].x() * m[1].z() + m_el[2].x() * m[2].z(),
1104  m_el[0].y() * m[0].x() + m_el[1].y() * m[1].x() + m_el[2].y() * m[2].x(),
1105  m_el[0].y() * m[0].y() + m_el[1].y() * m[1].y() + m_el[2].y() * m[2].y(),
1106  m_el[0].y() * m[0].z() + m_el[1].y() * m[1].z() + m_el[2].y() * m[2].z(),
1107  m_el[0].z() * m[0].x() + m_el[1].z() * m[1].x() + m_el[2].z() * m[2].x(),
1108  m_el[0].z() * m[0].y() + m_el[1].z() * m[1].y() + m_el[2].z() * m[2].y(),
1109  m_el[0].z() * m[0].z() + m_el[1].z() * m[1].z() + m_el[2].z() * m[2].z());
1110 #endif
1111 }
1112 
1115 {
1116 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
1117  __m128 a0 = m_el[0].mVec128;
1118  __m128 a1 = m_el[1].mVec128;
1119  __m128 a2 = m_el[2].mVec128;
1120 
1121  btMatrix3x3 mT = m.transpose(); // we rely on transpose() zeroing w channel so that we don't have to do it here
1122  __m128 mx = mT[0].mVec128;
1123  __m128 my = mT[1].mVec128;
1124  __m128 mz = mT[2].mVec128;
1125 
1126  __m128 r0 = _mm_mul_ps(mx, _mm_shuffle_ps(a0, a0, 0x00));
1127  __m128 r1 = _mm_mul_ps(mx, _mm_shuffle_ps(a1, a1, 0x00));
1128  __m128 r2 = _mm_mul_ps(mx, _mm_shuffle_ps(a2, a2, 0x00));
1129  r0 = _mm_add_ps(r0, _mm_mul_ps(my, _mm_shuffle_ps(a0, a0, 0x55)));
1130  r1 = _mm_add_ps(r1, _mm_mul_ps(my, _mm_shuffle_ps(a1, a1, 0x55)));
1131  r2 = _mm_add_ps(r2, _mm_mul_ps(my, _mm_shuffle_ps(a2, a2, 0x55)));
1132  r0 = _mm_add_ps(r0, _mm_mul_ps(mz, _mm_shuffle_ps(a0, a0, 0xaa)));
1133  r1 = _mm_add_ps(r1, _mm_mul_ps(mz, _mm_shuffle_ps(a1, a1, 0xaa)));
1134  r2 = _mm_add_ps(r2, _mm_mul_ps(mz, _mm_shuffle_ps(a2, a2, 0xaa)));
1135  return btMatrix3x3( r0, r1, r2);
1136 
1137 #elif defined BT_USE_NEON
1138  float32x4_t a0 = m_el[0].mVec128;
1139  float32x4_t a1 = m_el[1].mVec128;
1140  float32x4_t a2 = m_el[2].mVec128;
1141 
1142  btMatrix3x3 mT = m.transpose(); // we rely on transpose() zeroing w channel so that we don't have to do it here
1143  float32x4_t mx = mT[0].mVec128;
1144  float32x4_t my = mT[1].mVec128;
1145  float32x4_t mz = mT[2].mVec128;
1146 
1147  float32x4_t r0 = vmulq_lane_f32( mx, vget_low_f32(a0), 0);
1148  float32x4_t r1 = vmulq_lane_f32( mx, vget_low_f32(a1), 0);
1149  float32x4_t r2 = vmulq_lane_f32( mx, vget_low_f32(a2), 0);
1150  r0 = vmlaq_lane_f32( r0, my, vget_low_f32(a0), 1);
1151  r1 = vmlaq_lane_f32( r1, my, vget_low_f32(a1), 1);
1152  r2 = vmlaq_lane_f32( r2, my, vget_low_f32(a2), 1);
1153  r0 = vmlaq_lane_f32( r0, mz, vget_high_f32(a0), 0);
1154  r1 = vmlaq_lane_f32( r1, mz, vget_high_f32(a1), 0);
1155  r2 = vmlaq_lane_f32( r2, mz, vget_high_f32(a2), 0);
1156  return btMatrix3x3( r0, r1, r2 );
1157 
1158 #else
1159  return btMatrix3x3(
1160  m_el[0].dot(m[0]), m_el[0].dot(m[1]), m_el[0].dot(m[2]),
1161  m_el[1].dot(m[0]), m_el[1].dot(m[1]), m_el[1].dot(m[2]),
1162  m_el[2].dot(m[0]), m_el[2].dot(m[1]), m_el[2].dot(m[2]));
1163 #endif
1164 }
1165 
1167 operator*(const btMatrix3x3& m, const btVector3& v)
1168 {
1169 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
1170  return v.dot3(m[0], m[1], m[2]);
1171 #else
1172  return btVector3(m[0].dot(v), m[1].dot(v), m[2].dot(v));
1173 #endif
1174 }
1175 
1176 
1178 operator*(const btVector3& v, const btMatrix3x3& m)
1179 {
1180 #if defined BT_USE_SIMD_VECTOR3 && (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
1181 
1182  const __m128 vv = v.mVec128;
1183 
1184  __m128 c0 = bt_splat_ps( vv, 0);
1185  __m128 c1 = bt_splat_ps( vv, 1);
1186  __m128 c2 = bt_splat_ps( vv, 2);
1187 
1188  c0 = _mm_mul_ps(c0, _mm_and_ps(m[0].mVec128, btvFFF0fMask) );
1189  c1 = _mm_mul_ps(c1, _mm_and_ps(m[1].mVec128, btvFFF0fMask) );
1190  c0 = _mm_add_ps(c0, c1);
1191  c2 = _mm_mul_ps(c2, _mm_and_ps(m[2].mVec128, btvFFF0fMask) );
1192 
1193  return btVector3(_mm_add_ps(c0, c2));
1194 #elif defined(BT_USE_NEON)
1195  const float32x4_t vv = v.mVec128;
1196  const float32x2_t vlo = vget_low_f32(vv);
1197  const float32x2_t vhi = vget_high_f32(vv);
1198 
1199  float32x4_t c0, c1, c2;
1200 
1201  c0 = (float32x4_t) vandq_s32((int32x4_t)m[0].mVec128, btvFFF0Mask);
1202  c1 = (float32x4_t) vandq_s32((int32x4_t)m[1].mVec128, btvFFF0Mask);
1203  c2 = (float32x4_t) vandq_s32((int32x4_t)m[2].mVec128, btvFFF0Mask);
1204 
1205  c0 = vmulq_lane_f32(c0, vlo, 0);
1206  c1 = vmulq_lane_f32(c1, vlo, 1);
1207  c2 = vmulq_lane_f32(c2, vhi, 0);
1208  c0 = vaddq_f32(c0, c1);
1209  c0 = vaddq_f32(c0, c2);
1210 
1211  return btVector3(c0);
1212 #else
1213  return btVector3(m.tdotx(v), m.tdoty(v), m.tdotz(v));
1214 #endif
1215 }
1216 
1218 operator*(const btMatrix3x3& m1, const btMatrix3x3& m2)
1219 {
1220 #if defined BT_USE_SIMD_VECTOR3 && (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
1221 
1222  __m128 m10 = m1[0].mVec128;
1223  __m128 m11 = m1[1].mVec128;
1224  __m128 m12 = m1[2].mVec128;
1225 
1226  __m128 m2v = _mm_and_ps(m2[0].mVec128, btvFFF0fMask);
1227 
1228  __m128 c0 = bt_splat_ps( m10, 0);
1229  __m128 c1 = bt_splat_ps( m11, 0);
1230  __m128 c2 = bt_splat_ps( m12, 0);
1231 
1232  c0 = _mm_mul_ps(c0, m2v);
1233  c1 = _mm_mul_ps(c1, m2v);
1234  c2 = _mm_mul_ps(c2, m2v);
1235 
1236  m2v = _mm_and_ps(m2[1].mVec128, btvFFF0fMask);
1237 
1238  __m128 c0_1 = bt_splat_ps( m10, 1);
1239  __m128 c1_1 = bt_splat_ps( m11, 1);
1240  __m128 c2_1 = bt_splat_ps( m12, 1);
1241 
1242  c0_1 = _mm_mul_ps(c0_1, m2v);
1243  c1_1 = _mm_mul_ps(c1_1, m2v);
1244  c2_1 = _mm_mul_ps(c2_1, m2v);
1245 
1246  m2v = _mm_and_ps(m2[2].mVec128, btvFFF0fMask);
1247 
1248  c0 = _mm_add_ps(c0, c0_1);
1249  c1 = _mm_add_ps(c1, c1_1);
1250  c2 = _mm_add_ps(c2, c2_1);
1251 
1252  m10 = bt_splat_ps( m10, 2);
1253  m11 = bt_splat_ps( m11, 2);
1254  m12 = bt_splat_ps( m12, 2);
1255 
1256  m10 = _mm_mul_ps(m10, m2v);
1257  m11 = _mm_mul_ps(m11, m2v);
1258  m12 = _mm_mul_ps(m12, m2v);
1259 
1260  c0 = _mm_add_ps(c0, m10);
1261  c1 = _mm_add_ps(c1, m11);
1262  c2 = _mm_add_ps(c2, m12);
1263 
1264  return btMatrix3x3(c0, c1, c2);
1265 
1266 #elif defined(BT_USE_NEON)
1267 
1268  float32x4_t rv0, rv1, rv2;
1269  float32x4_t v0, v1, v2;
1270  float32x4_t mv0, mv1, mv2;
1271 
1272  v0 = m1[0].mVec128;
1273  v1 = m1[1].mVec128;
1274  v2 = m1[2].mVec128;
1275 
1276  mv0 = (float32x4_t) vandq_s32((int32x4_t)m2[0].mVec128, btvFFF0Mask);
1277  mv1 = (float32x4_t) vandq_s32((int32x4_t)m2[1].mVec128, btvFFF0Mask);
1278  mv2 = (float32x4_t) vandq_s32((int32x4_t)m2[2].mVec128, btvFFF0Mask);
1279 
1280  rv0 = vmulq_lane_f32(mv0, vget_low_f32(v0), 0);
1281  rv1 = vmulq_lane_f32(mv0, vget_low_f32(v1), 0);
1282  rv2 = vmulq_lane_f32(mv0, vget_low_f32(v2), 0);
1283 
1284  rv0 = vmlaq_lane_f32(rv0, mv1, vget_low_f32(v0), 1);
1285  rv1 = vmlaq_lane_f32(rv1, mv1, vget_low_f32(v1), 1);
1286  rv2 = vmlaq_lane_f32(rv2, mv1, vget_low_f32(v2), 1);
1287 
1288  rv0 = vmlaq_lane_f32(rv0, mv2, vget_high_f32(v0), 0);
1289  rv1 = vmlaq_lane_f32(rv1, mv2, vget_high_f32(v1), 0);
1290  rv2 = vmlaq_lane_f32(rv2, mv2, vget_high_f32(v2), 0);
1291 
1292  return btMatrix3x3(rv0, rv1, rv2);
1293 
1294 #else
1295  return btMatrix3x3(
1296  m2.tdotx( m1[0]), m2.tdoty( m1[0]), m2.tdotz( m1[0]),
1297  m2.tdotx( m1[1]), m2.tdoty( m1[1]), m2.tdotz( m1[1]),
1298  m2.tdotx( m1[2]), m2.tdoty( m1[2]), m2.tdotz( m1[2]));
1299 #endif
1300 }
1301 
1302 /*
1303 SIMD_FORCE_INLINE btMatrix3x3 btMultTransposeLeft(const btMatrix3x3& m1, const btMatrix3x3& m2) {
1304 return btMatrix3x3(
1305 m1[0][0] * m2[0][0] + m1[1][0] * m2[1][0] + m1[2][0] * m2[2][0],
1306 m1[0][0] * m2[0][1] + m1[1][0] * m2[1][1] + m1[2][0] * m2[2][1],
1307 m1[0][0] * m2[0][2] + m1[1][0] * m2[1][2] + m1[2][0] * m2[2][2],
1308 m1[0][1] * m2[0][0] + m1[1][1] * m2[1][0] + m1[2][1] * m2[2][0],
1309 m1[0][1] * m2[0][1] + m1[1][1] * m2[1][1] + m1[2][1] * m2[2][1],
1310 m1[0][1] * m2[0][2] + m1[1][1] * m2[1][2] + m1[2][1] * m2[2][2],
1311 m1[0][2] * m2[0][0] + m1[1][2] * m2[1][0] + m1[2][2] * m2[2][0],
1312 m1[0][2] * m2[0][1] + m1[1][2] * m2[1][1] + m1[2][2] * m2[2][1],
1313 m1[0][2] * m2[0][2] + m1[1][2] * m2[1][2] + m1[2][2] * m2[2][2]);
1314 }
1315 */
1316 
1320 {
1321 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
1322 
1323  __m128 c0, c1, c2;
1324 
1325  c0 = _mm_cmpeq_ps(m1[0].mVec128, m2[0].mVec128);
1326  c1 = _mm_cmpeq_ps(m1[1].mVec128, m2[1].mVec128);
1327  c2 = _mm_cmpeq_ps(m1[2].mVec128, m2[2].mVec128);
1328 
1329  c0 = _mm_and_ps(c0, c1);
1330  c0 = _mm_and_ps(c0, c2);
1331 
1332  return (0x7 == _mm_movemask_ps((__m128)c0));
1333 #else
1334  return
1335  ( m1[0][0] == m2[0][0] && m1[1][0] == m2[1][0] && m1[2][0] == m2[2][0] &&
1336  m1[0][1] == m2[0][1] && m1[1][1] == m2[1][1] && m1[2][1] == m2[2][1] &&
1337  m1[0][2] == m2[0][2] && m1[1][2] == m2[1][2] && m1[2][2] == m2[2][2] );
1338 #endif
1339 }
1340 
1343 {
1345 };
1346 
1349 {
1351 };
1352 
1353 
1354 
1355 
1357 {
1358  for (int i=0;i<3;i++)
1359  m_el[i].serialize(dataOut.m_el[i]);
1360 }
1361 
1363 {
1364  for (int i=0;i<3;i++)
1365  m_el[i].serializeFloat(dataOut.m_el[i]);
1366 }
1367 
1368 
1370 {
1371  for (int i=0;i<3;i++)
1372  m_el[i].deSerialize(dataIn.m_el[i]);
1373 }
1374 
1376 {
1377  for (int i=0;i<3;i++)
1378  m_el[i].deSerializeFloat(dataIn.m_el[i]);
1379 }
1380 
1382 {
1383  for (int i=0;i<3;i++)
1384  m_el[i].deSerializeDouble(dataIn.m_el[i]);
1385 }
1386 
1387 #endif //BT_MATRIX3x3_H
1388 
const btScalar & x() const
Return the x value.
Definition: btQuadWord.h:116
void deSerializeFloat(const struct btMatrix3x3FloatData &dataIn)
Definition: btMatrix3x3.h:1375
#define SIMD_EPSILON
Definition: btScalar.h:494
for serialization
Definition: btMatrix3x3.h:1342
void serialize(struct btMatrix3x3Data &dataOut) const
Definition: btMatrix3x3.h:1356
btVector3DoubleData m_el[3]
Definition: btMatrix3x3.h:1350
bool operator==(const btMatrix3x3 &m1, const btMatrix3x3 &m2)
Equality operator between two matrices It will test all elements are equal.
Definition: btMatrix3x3.h:1319
btScalar tdoty(const btVector3 &v) const
Definition: btMatrix3x3.h:641
btScalar tdotx(const btVector3 &v) const
Definition: btMatrix3x3.h:637
btMatrix3x3 timesTranspose(const btMatrix3x3 &m) const
Definition: btMatrix3x3.h:1114
void setValue(const btScalar &_x, const btScalar &_y, const btScalar &_z)
Definition: btVector3.h:640
btVector3 solve33(const btVector3 &b) const
Solve A * x = b, where b is a column vector.
Definition: btMatrix3x3.h:616
void setRotation(const btQuaternion &q)
Set the matrix from a quaternion.
Definition: btMatrix3x3.h:209
void serializeFloat(struct btMatrix3x3FloatData &dataOut) const
Definition: btMatrix3x3.h:1362
btScalar tdotz(const btVector3 &v) const
Definition: btMatrix3x3.h:645
btVector3 dot3(const btVector3 &v0, const btVector3 &v1, const btVector3 &v2) const
Definition: btVector3.h:719
btScalar btSin(btScalar x)
Definition: btScalar.h:451
const btVector3 & getRow(int i) const
Get a row of the matrix as a vector.
Definition: btMatrix3x3.h:142
btScalar btSqrt(btScalar y)
Definition: btScalar.h:418
unsigned int uint32_t
#define SIMD_FORCE_INLINE
Definition: btScalar.h:63
btScalar cofac(int r1, int c1, int r2, int c2) const
Calculate the matrix cofactor.
Definition: btMatrix3x3.h:748
btMatrix3x3 operator+(const btMatrix3x3 &m1, const btMatrix3x3 &m2)
Definition: btMatrix3x3.h:901
btMatrix3x3 & operator=(const btMatrix3x3 &other)
Assignment Operator.
Definition: btMatrix3x3.h:122
btQuaternion inverse(const btQuaternion &q)
Return the inverse of a quaternion.
Definition: btQuaternion.h:849
#define btFullAssert(x)
Definition: btScalar.h:116
#define SIMD_HALF_PI
Definition: btScalar.h:478
btVector3 m_el[3]
Data storage for the matrix, each vector is a row of the matrix.
Definition: btMatrix3x3.h:51
btMatrix3x3 transpose() const
Return the transpose of the matrix.
Definition: btMatrix3x3.h:1001
btMatrix3x3(const btQuaternion &q)
Constructor from Quaternion.
Definition: btMatrix3x3.h:60
btVector3 btCross(const btVector3 &v1, const btVector3 &v2)
Return the cross product of two vectors.
Definition: btVector3.h:919
btVector3 getColumn(int i) const
Get a column of the matrix as a vector.
Definition: btMatrix3x3.h:134
void deSerialize(const struct btMatrix3x3Data &dataIn)
Definition: btMatrix3x3.h:1369
#define SIMD_PI
Definition: btScalar.h:476
btMatrix3x3 transposeTimes(const btMatrix3x3 &m) const
Definition: btMatrix3x3.h:1058
void diagonalize(btMatrix3x3 &rot, btScalar threshold, int maxSteps)
diagonalizes this matrix by the Jacobi method.
Definition: btMatrix3x3.h:660
const btScalar & x() const
Return the x value.
Definition: btVector3.h:575
#define btMatrix3x3Data
Definition: btMatrix3x3.h:42
btMatrix3x3 adjoint() const
Return the adjoint of the matrix.
Definition: btMatrix3x3.h:1038
void deSerializeDouble(const struct btMatrix3x3DoubleData &dataIn)
Definition: btMatrix3x3.h:1381
btMatrix3x3 & operator*=(const btMatrix3x3 &m)
Multiply by the target matrix on the right.
Definition: btMatrix3x3.h:767
btScalar btAtan2(btScalar x, btScalar y)
Definition: btScalar.h:468
void setValue(const btScalar &_x, const btScalar &_y, const btScalar &_z)
Set x,y,z and zero w.
Definition: btQuadWord.h:152
btMatrix3x3 operator*(const btMatrix3x3 &m, const btScalar &k)
Definition: btMatrix3x3.h:879
btMatrix3x3 scaled(const btVector3 &s) const
Create a scaled copy of the matrix.
Definition: btMatrix3x3.h:590
const btScalar & y() const
Return the y value.
Definition: btVector3.h:577
const btScalar & z() const
Return the z value.
Definition: btVector3.h:579
void getEulerZYX(btScalar &yaw, btScalar &pitch, btScalar &roll, unsigned int solution_number=1) const
Get the matrix represented as euler angles around ZYX.
Definition: btMatrix3x3.h:521
void setValue(const btScalar &xx, const btScalar &xy, const btScalar &xz, const btScalar &yx, const btScalar &yy, const btScalar &yz, const btScalar &zx, const btScalar &zy, const btScalar &zz)
Set the values of the matrix explicitly (row major)
Definition: btMatrix3x3.h:198
btMatrix3x3(const btScalar &xx, const btScalar &xy, const btScalar &xz, const btScalar &yx, const btScalar &yy, const btScalar &yz, const btScalar &zx, const btScalar &zy, const btScalar &zz)
Constructor with row major formatting.
Definition: btMatrix3x3.h:69
const btScalar & z() const
Return the z value.
Definition: btQuadWord.h:120
void getEulerYPR(btScalar &yaw, btScalar &pitch, btScalar &roll) const
Get the matrix represented as euler angles around YXZ, roundtrip with setEulerYPR.
Definition: btMatrix3x3.h:492
btVector3 can be used to represent 3D points and vectors.
Definition: btVector3.h:83
#define ATTRIBUTE_ALIGNED16(a)
Definition: btScalar.h:64
btMatrix3x3 & operator-=(const btMatrix3x3 &m)
Substractss by the target matrix on the right.
Definition: btMatrix3x3.h:950
void setEulerYPR(const btScalar &yaw, const btScalar &pitch, const btScalar &roll)
Set the matrix from euler angles using YPR around YXZ respectively.
Definition: btMatrix3x3.h:284
btMatrix3x3 & operator+=(const btMatrix3x3 &m)
Adds by the target matrix on the right.
Definition: btMatrix3x3.h:857
btMatrix3x3 operator-(const btMatrix3x3 &m1, const btMatrix3x3 &m2)
Definition: btMatrix3x3.h:925
btScalar determinant() const
Return the determinant of the matrix.
Definition: btMatrix3x3.h:973
btMatrix3x3 absolute() const
Return the matrix with all values non negative.
Definition: btMatrix3x3.h:980
btMatrix3x3()
No initializaion constructor.
Definition: btMatrix3x3.h:55
void getOpenGLSubMatrix(btScalar *m) const
Fill the rotational part of an OpenGL matrix and clear the shear/perspective.
Definition: btMatrix3x3.h:347
const btVector3 & operator[](int i) const
Get a const reference to a row of the matrix as a vector.
Definition: btMatrix3x3.h:158
btScalar length2() const
Return the length squared of the quaternion.
Definition: btQuaternion.h:319
btVector3 & operator[](int i)
Get a mutable reference to a row of the matrix as a vector.
Definition: btMatrix3x3.h:150
The btMatrix3x3 class implements a 3x3 rotation matrix, to perform linear algebra in combination with...
Definition: btMatrix3x3.h:48
const btScalar & y() const
Return the y value.
Definition: btQuadWord.h:118
btScalar dot(const btQuaternion &q1, const btQuaternion &q2)
Calculate the dot product between two quaternions.
Definition: btQuaternion.h:827
btMatrix3x3(const btMatrix3x3 &other)
Copy constructor.
Definition: btMatrix3x3.h:114
for serialization
Definition: btMatrix3x3.h:1348
The btQuaternion implements quaternion to perform linear algebra rotations in combination with btMatr...
Definition: btQuaternion.h:48
void setFromOpenGLSubMatrix(const btScalar *m)
Set from the rotational part of a 4x4 OpenGL matrix.
Definition: btMatrix3x3.h:181
btScalar btAsin(btScalar x)
Definition: btScalar.h:460
btScalar btDot(const btVector3 &v1, const btVector3 &v2)
Return the dot product between two vectors.
Definition: btVector3.h:889
btMatrix3x3 inverse() const
Return the inverse of the matrix.
Definition: btMatrix3x3.h:1046
const btScalar & w() const
Return the w value.
Definition: btQuadWord.h:122
btScalar btTriple(const btVector3 &v1, const btVector3 &v2, const btVector3 &v3)
Definition: btVector3.h:925
void getRotation(btQuaternion &q) const
Get the matrix represented as a quaternion.
Definition: btMatrix3x3.h:400
void setIdentity()
Set the matrix to the identity.
Definition: btMatrix3x3.h:317
static const btMatrix3x3 & getIdentity()
Definition: btMatrix3x3.h:330
float btScalar
The btScalar type abstracts floating point numbers, to easily switch between double and single floati...
Definition: btScalar.h:278
btScalar btCos(btScalar x)
Definition: btScalar.h:450
btVector3FloatData m_el[3]
Definition: btMatrix3x3.h:1344
btScalar btFabs(btScalar x)
Definition: btScalar.h:449
void setEulerZYX(btScalar eulerX, btScalar eulerY, btScalar eulerZ)
Set the matrix from euler angles YPR around ZYX axes.
Definition: btMatrix3x3.h:298