diff --git a/config/GMPE01_00/rels/m420dll/symbols.txt b/config/GMPE01_00/rels/m420dll/symbols.txt index 8e6b2a20..365355d3 100644 --- a/config/GMPE01_00/rels/m420dll/symbols.txt +++ b/config/GMPE01_00/rels/m420dll/symbols.txt @@ -207,9 +207,9 @@ lbl_1_data_0 = .data:0x00000000; // type:object size:0x1E scope:local data:strin lbl_1_data_1E = .data:0x0000001E; // type:object size:0x1E scope:local data:string lbl_1_data_3C = .data:0x0000003C; // type:object size:0x11 scope:local data:string lbl_1_data_50 = .data:0x00000050; // type:object size:0x80 -lbl_1_data_D0 = .data:0x000000D0; // type:object size:0x1B data:string +lbl_1_data_D0 = .data:0x000000D0; // type:object size:0x1B scope:local data:string lbl_1_data_EC = .data:0x000000EC; // type:object size:0x30 -lbl_1_data_11C = .data:0x0000011C; // type:object size:0x19 data:string +lbl_1_data_11C = .data:0x0000011C; // type:object size:0x19 scope:local data:string lbl_1_data_135 = .data:0x00000135; // type:object size:0x1 lbl_1_data_138 = .data:0x00000138; // type:object size:0x1C lbl_1_data_154 = .data:0x00000154; // type:object size:0x10 data:4byte diff --git a/config/GMPE01_00/splits.txt b/config/GMPE01_00/splits.txt index 7eefd3c3..6b38187f 100644 --- a/config/GMPE01_00/splits.txt +++ b/config/GMPE01_00/splits.txt @@ -680,12 +680,12 @@ dolphin/mtx/vec.c: .sdata2 start:0x801D6380 end:0x801D6390 dolphin/mtx/quat.c: - .text start:0x800BC2C4 end:0x800BC710 + .text start:0x800BC2C4 end:0x800BC884 .rodata start:0x8011E430 end:0x8011E440 .sdata2 start:0x801D6390 end:0x801D63B8 dolphin/mtx/psmtx.c: - .text start:0x800BC710 end:0x800BC9E8 + .text start:0x800BC884 end:0x800BC9E8 dolphin/dvd/dvdlow.c: .text start:0x800BC9E8 end:0x800BD83C @@ -742,7 +742,7 @@ dolphin/demo/DEMOPuts.c: dolphin/demo/DEMOStats.c: .text start:0x800C2F84 end:0x800C3CDC - .data start:0x8013D5E0 end:0x8013D748 + .data start:0x8013D5E0 end:0x8013D738 .sdata start:0x801D3908 end:0x801D3910 .sbss start:0x801D4488 end:0x801D44E8 .sdata2 start:0x801D63D8 end:0x801D63E8 @@ -753,6 +753,7 @@ dolphin/pad/Padclamp.c: dolphin/pad/Pad.c: .text start:0x800C3F14 end:0x800C59DC + .data start:0x8013D738 end:0x8013D748 .bss start:0x801A61B0 end:0x801A6200 .sdata start:0x801D3918 end:0x801D3938 .sbss start:0x801D44E8 end:0x801D4510 diff --git a/config/GMPE01_00/symbols.txt b/config/GMPE01_00/symbols.txt index 1c59588e..99198763 100644 --- a/config/GMPE01_00/symbols.txt +++ b/config/GMPE01_00/symbols.txt @@ -4345,10 +4345,10 @@ lbl_8013C2F0 = .data:0x8013C2F0; // type:object size:0x78 YearDays = .data:0x8013C368; // type:object size:0x30 scope:local LeapYearDays = .data:0x8013C398; // type:object size:0x30 scope:local lbl_8013C3C8 = .data:0x8013C3C8; // type:object size:0x18 scope:local data:string -lbl_8013C3E0 = .data:0x8013C3E0; // type:object size:0xC8 data:string -lbl_8013C4A8 = .data:0x8013C4A8; // type:object size:0x38 -lbl_8013C4E0 = .data:0x8013C4E0; // type:object size:0x34 data:string -lbl_8013C514 = .data:0x8013C514; // type:object size:0x34 +lbl_8013C3E0 = .data:0x8013C3E0; // type:object size:0xC8 scope:local data:string +lbl_8013C4A8 = .data:0x8013C4A8; // type:object size:0x38 scope:local +lbl_8013C4E0 = .data:0x8013C4E0; // type:object size:0x34 scope:local data:string +lbl_8013C514 = .data:0x8013C514; // type:object size:0x34 scope:local @13 = .data:0x8013C548; // type:object size:0x15 scope:local data:string @293 = .data:0x8013C594; // type:object size:0x20 scope:local jumptable_8013C5B4 = .data:0x8013C5B4; // type:object size:0x40 scope:local @@ -4373,7 +4373,7 @@ jumptable_8013D698 = .data:0x8013D698; // type:object size:0x28 scope:local jumptable_8013D6C0 = .data:0x8013D6C0; // type:object size:0x28 scope:local jumptable_8013D6E8 = .data:0x8013D6E8; // type:object size:0x28 scope:local jumptable_8013D710 = .data:0x8013D710; // type:object size:0x28 scope:local -lbl_8013D738 = .data:0x8013D738; // type:object size:0x10 +ResetFunctionInfo = .data:0x8013D738; // type:object size:0x10 lbl_8013D748 = .data:0x8013D748; // type:object size:0x38 lbl_8013D780 = .data:0x8013D780; // type:object size:0x140 jumptable_8013D8C0 = .data:0x8013D8C0; // type:object size:0x68 scope:local @@ -4976,8 +4976,8 @@ __OSArenaLo = .sdata:0x801D38C0; // type:object size:0x4 scope:local data:4byte @32 = .sdata:0x801D38C8; // type:object size:0x2 scope:local data:string fontEncode$80 = .sdata:0x801D38D0; // type:object size:0x8 scope:local data:2byte Unit01 = .sdata:0x801D38D8; // type:object size:0x8 -FirstRead = .sdata:0x801D38E0; // type:object size:0x8 scope:local data:4byte -lbl_801D38E8 = .sdata:0x801D38E8; // type:object size:0x8 data:string +FirstRead = .sdata:0x801D38E0; // type:object size:0x4 scope:local data:4byte +lbl_801D38E8 = .sdata:0x801D38E8; // type:object size:0x8 scope:local data:string autoInvalidation = .sdata:0x801D38F0; // type:object size:0x4 scope:local data:4byte @35 = .sdata:0x801D38F8; // type:object size:0x2 scope:local data:string @40 = .sdata:0x801D38FC; // type:object size:0x4 scope:local data:string @@ -7284,21 +7284,21 @@ lbl_801D6338 = .sdata2:0x801D6338; // type:object size:0x4 data:float lbl_801D633C = .sdata2:0x801D633C; // type:object size:0x4 data:float lbl_801D6340 = .sdata2:0x801D6340; // type:object size:0x8 data:double lbl_801D6348 = .sdata2:0x801D6348; // type:object size:0x8 data:float -lbl_801D6350 = .sdata2:0x801D6350; // type:object size:0x4 data:float -lbl_801D6354 = .sdata2:0x801D6354; // type:object size:0x4 data:float -lbl_801D6358 = .sdata2:0x801D6358; // type:object size:0x4 data:float -lbl_801D635C = .sdata2:0x801D635C; // type:object size:0x4 data:float -lbl_801D6360 = .sdata2:0x801D6360; // type:object size:0x8 data:float -lbl_801D6368 = .sdata2:0x801D6368; // type:object size:0x4 data:float -lbl_801D636C = .sdata2:0x801D636C; // type:object size:0x4 data:float -lbl_801D6370 = .sdata2:0x801D6370; // type:object size:0x4 data:float -lbl_801D6374 = .sdata2:0x801D6374; // type:object size:0x4 data:float -lbl_801D6378 = .sdata2:0x801D6378; // type:object size:0x4 data:float -lbl_801D637C = .sdata2:0x801D637C; // type:object size:0x4 data:float -lbl_801D6380 = .sdata2:0x801D6380; // type:object size:0x4 data:float -lbl_801D6384 = .sdata2:0x801D6384; // type:object size:0x4 data:float -lbl_801D6388 = .sdata2:0x801D6388; // type:object size:0x4 data:float -lbl_801D638C = .sdata2:0x801D638C; // type:object size:0x4 data:float +lbl_801D6350 = .sdata2:0x801D6350; // type:object size:0x4 scope:local data:float +lbl_801D6354 = .sdata2:0x801D6354; // type:object size:0x4 scope:local data:float +lbl_801D6358 = .sdata2:0x801D6358; // type:object size:0x4 scope:local data:float +lbl_801D635C = .sdata2:0x801D635C; // type:object size:0x4 scope:local data:float +lbl_801D6360 = .sdata2:0x801D6360; // type:object size:0x8 scope:local data:float +lbl_801D6368 = .sdata2:0x801D6368; // type:object size:0x4 scope:local data:float +lbl_801D636C = .sdata2:0x801D636C; // type:object size:0x4 scope:local data:float +lbl_801D6370 = .sdata2:0x801D6370; // type:object size:0x4 scope:local data:float +lbl_801D6374 = .sdata2:0x801D6374; // type:object size:0x4 scope:local data:float +lbl_801D6378 = .sdata2:0x801D6378; // type:object size:0x4 scope:local data:float +lbl_801D637C = .sdata2:0x801D637C; // type:object size:0x4 scope:local data:float +lbl_801D6380 = .sdata2:0x801D6380; // type:object size:0x4 scope:local data:float +lbl_801D6384 = .sdata2:0x801D6384; // type:object size:0x4 scope:local data:float +lbl_801D6388 = .sdata2:0x801D6388; // type:object size:0x4 scope:local data:float +lbl_801D638C = .sdata2:0x801D638C; // type:object size:0x4 scope:local data:float lbl_801D6390 = .sdata2:0x801D6390; // type:object size:0x4 data:float lbl_801D6394 = .sdata2:0x801D6394; // type:object size:0x4 data:float lbl_801D6398 = .sdata2:0x801D6398; // type:object size:0x8 data:double diff --git a/configure.py b/configure.py index 67f32c23..cc6076bf 100644 --- a/configure.py +++ b/configure.py @@ -484,11 +484,11 @@ config.libs = [ "mtx", [ Object(NonMatching, "dolphin/mtx/mtx.c"), - Object(NonMatching, "dolphin/mtx/mtxvec.c"), - Object(NonMatching, "dolphin/mtx/mtx44.c"), + Object(Matching, "dolphin/mtx/mtxvec.c"), + Object(Matching, "dolphin/mtx/mtx44.c"), Object(NonMatching, "dolphin/mtx/vec.c"), Object(NonMatching, "dolphin/mtx/quat.c"), - Object(NonMatching, "dolphin/mtx/psmtx.c"), + Object(Matching, "dolphin/mtx/psmtx.c"), ], ), DolphinLib( @@ -521,7 +521,7 @@ config.libs = [ "pad", [ Object(NonMatching, "dolphin/pad/Padclamp.c"), - Object(NonMatching, "dolphin/pad/Pad.c"), + Object(Matching, "dolphin/pad/Pad.c"), ], ), DolphinLib( diff --git a/src/dolphin/mtx/mtx.c b/src/dolphin/mtx/mtx.c new file mode 100644 index 00000000..a8f2bb06 --- /dev/null +++ b/src/dolphin/mtx/mtx.c @@ -0,0 +1,1315 @@ +#include "dolphin/mtx.h" + +static f32 Unit01[] = { 0.0f, 1.0f }; + +extern f32 sinf(f32); + +void C_MTXIdentity(Mtx mtx) +{ + mtx[0][0] = 1.0f; + mtx[0][1] = 0.0f; + mtx[0][2] = 0.0f; + mtx[1][0] = 0.0f; + mtx[1][1] = 1.0f; + mtx[1][2] = 0.0f; + mtx[2][0] = 0.0f; + mtx[2][1] = 0.0f; + mtx[2][2] = 1.0f; +} + +#ifdef GEKKO +void PSMTXIdentity(register Mtx m) +{ + register f32 zero_c = 0.0f; + register f32 one_c = 1.0f; + register f32 c_01; + register f32 c_10; + // clang-format off + asm { + psq_st zero_c, 8(m), 0, 0 + ps_merge01 c_01, zero_c, one_c + psq_st zero_c, 24(m), 0, 0 + ps_merge10 c_10, one_c, zero_c + psq_st zero_c, 32(m), 0, 0 + psq_st c_01, 16(m), 0, 0 + psq_st c_10, 0(m), 0, 0 + psq_st c_10, 40(m), 0, 0 + } + // clang-format on +} +#endif + +void C_MTXCopy(const Mtx src, Mtx dst) +{ + + if (src == dst) { + return; + } + + dst[0][0] = src[0][0]; + dst[0][1] = src[0][1]; + dst[0][2] = src[0][2]; + dst[0][3] = src[0][3]; + + dst[1][0] = src[1][0]; + dst[1][1] = src[1][1]; + dst[1][2] = src[1][2]; + dst[1][3] = src[1][3]; + + dst[2][0] = src[2][0]; + dst[2][1] = src[2][1]; + dst[2][2] = src[2][2]; + dst[2][3] = src[2][3]; +} + +#ifdef GEKKO +asm void PSMTXCopy(const register Mtx src, register Mtx dst) +{ + // clang-format off + nofralloc + + psq_l fp0, 0(src), 0, 0 + psq_st fp0, 0(dst), 0, 0 + psq_l fp1, 8(src), 0, 0 + psq_st fp1, 8(dst), 0, 0 + psq_l fp2, 16(src), 0, 0 + psq_st fp2, 16(dst), 0, 0 + psq_l fp3, 24(src), 0, 0 + psq_st fp3, 24(dst), 0, 0 + psq_l fp4, 32(src), 0, 0 + psq_st fp4, 32(dst), 0, 0 + psq_l fp5, 40(src), 0, 0 + psq_st fp5, 40(dst), 0, 0 + + blr + // clang-format on +} +#endif + +void C_MTXConcat(const Mtx a, const Mtx b, Mtx ab) +{ + Mtx mTmp; + MtxPtr m; + + if ((ab == a) || (ab == b)) { + m = mTmp; + } + + else { + m = ab; + } + + m[0][0] = a[0][0] * b[0][0] + a[0][1] * b[1][0] + a[0][2] * b[2][0]; + m[0][1] = a[0][0] * b[0][1] + a[0][1] * b[1][1] + a[0][2] * b[2][1]; + m[0][2] = a[0][0] * b[0][2] + a[0][1] * b[1][2] + a[0][2] * b[2][2]; + m[0][3] = a[0][0] * b[0][3] + a[0][1] * b[1][3] + a[0][2] * b[2][3] + a[0][3]; + + m[1][0] = a[1][0] * b[0][0] + a[1][1] * b[1][0] + a[1][2] * b[2][0]; + m[1][1] = a[1][0] * b[0][1] + a[1][1] * b[1][1] + a[1][2] * b[2][1]; + m[1][2] = a[1][0] * b[0][2] + a[1][1] * b[1][2] + a[1][2] * b[2][2]; + m[1][3] = a[1][0] * b[0][3] + a[1][1] * b[1][3] + a[1][2] * b[2][3] + a[1][3]; + + m[2][0] = a[2][0] * b[0][0] + a[2][1] * b[1][0] + a[2][2] * b[2][0]; + m[2][1] = a[2][0] * b[0][1] + a[2][1] * b[1][1] + a[2][2] * b[2][1]; + m[2][2] = a[2][0] * b[0][2] + a[2][1] * b[1][2] + a[2][2] * b[2][2]; + m[2][3] = a[2][0] * b[0][3] + a[2][1] * b[1][3] + a[2][2] * b[2][3] + a[2][3]; + + if (m == mTmp) { + C_MTXCopy(mTmp, ab); + } +} + +#ifdef GEKKO +asm void PSMTXConcat(const register Mtx mA, const register Mtx mB, register Mtx mAB) +{ + // clang-format off + nofralloc + +#define FP0 fp0 +#define FP1 fp1 +#define FP2 fp2 +#define FP3 fp3 +#define FP4 fp4 +#define FP5 fp5 +#define FP6 fp6 +#define FP7 fp7 +#define FP8 fp8 +#define FP9 fp9 +#define FP10 fp10 +#define FP11 fp11 +#define FP12 fp12 +#define FP13 fp13 +#define FP14 fp14 +#define FP15 fp15 +#define FP31 fp31 + stwu r1, -64(r1); + psq_l FP0, 0(mA), 0, 0; + stfd fp14, 8(r1); + psq_l FP6, 0(mB), 0, 0; + addis r6, 0, Unit01@ha; + psq_l FP7, 8(mB), 0, 0; + stfd fp15, 16(r1) + addi r6, r6, Unit01@l; + stfd fp31, 40(r1) + psq_l FP8, 16(mB), 0, 0 + ps_muls0 FP12, FP6, FP0 + psq_l FP2, 16(mA), 0, 0 + ps_muls0 FP13, FP7, FP0 + psq_l FP31, 0(r6), 0, 0 + ps_muls0 FP14, FP6, FP2 + psq_l FP9, 24(mB), 0, 0 + ps_muls0 FP15, FP7, FP2 + psq_l FP1, 8(mA), 0, 0 + ps_madds1 FP12, FP8, FP0, FP12 + psq_l FP3, 24(mA), 0, 0 + ps_madds1 FP14, FP8, FP2, FP14 + psq_l FP10, 32(mB), 0, 0 + ps_madds1 FP13, FP9, FP0, FP13 + psq_l FP11, 40(mB), 0, 0 + ps_madds1 FP15, FP9, FP2, FP15 + psq_l FP4, 32(mA), 0, 0 + psq_l FP5, 40(mA), 0, 0 + ps_madds0 FP12, FP10, FP1, FP12 + ps_madds0 FP13, FP11, FP1, FP13 + ps_madds0 FP14, FP10, FP3, FP14 + ps_madds0 FP15, FP11, FP3, FP15 + psq_st FP12, 0(mAB), 0, 0 + + ps_muls0 FP2, FP6, FP4 + ps_madds1 FP13, FP31, FP1, FP13 + ps_muls0 FP0, FP7, FP4 + psq_st FP14, 16(mAB), 0, 0 + ps_madds1 FP15, FP31, FP3, FP15 + + psq_st FP13, 8(mAB), 0, 0 + + ps_madds1 FP2, FP8, FP4, FP2 + ps_madds1 FP0, FP9, FP4, FP0 + ps_madds0 FP2, FP10, FP5, FP2 + lfd fp14, 8(r1) + psq_st FP15, 24(mAB), 0, 0 + ps_madds0 FP0, FP11, FP5, FP0 + psq_st FP2, 32(mAB), 0, 0 + ps_madds1 FP0, FP31, FP5, FP0 + lfd fp15, 16(r1) + psq_st FP0, 40(mAB), 0, 0 + + lfd fp31, 40(r1) + addi r1, r1, 64 + + blr + // clang-format on + +#undef FP0 +#undef FP1 +#undef FP2 +#undef FP3 +#undef FP4 +#undef FP5 +#undef FP6 +#undef FP7 +#undef FP8 +#undef FP9 +#undef FP10 +#undef FP11 +#undef FP12 +#undef FP13 +#undef FP14 +#undef FP15 +#undef FP31 +} +#endif + +void C_MTXConcatArray(const Mtx a, const Mtx *srcBase, Mtx *dstBase, u32 count) +{ + u32 i; + for (i = 0; i < count; i++) { + C_MTXConcat(a, *srcBase, *dstBase); + + srcBase++; + dstBase++; + } +} + +#ifdef GEKKO +#if (defined(__MWERKS__) && defined(_DEBUG)) +#pragma global_optimizer on +#pragma optimization_level 1 +#endif + +void PSMTXConcatArray(const register Mtx a, const register Mtx *srcBase, register Mtx *dstBase, register u32 count) +{ + register f32 va0, va1, va2, va3, va4, va5; + register f32 vb0, vb1, vb2, vb3, vb4, vb5; + register f32 vd0, vd1, vd2, vd3, vd4, vd5; + register f32 u01; + register f32 *u01Ptr = Unit01; + + // clang-format off + asm + { + psq_l va0, 0(a), 0, 0 + psq_l va1, 8(a), 0, 0 + psq_l va2, 16(a), 0, 0 + psq_l va3, 24(a), 0, 0 + subi count, count, 1 + psq_l va4, 32(a), 0, 0 + psq_l va5, 40(a), 0, 0 + mtctr count + psq_l u01, 0(u01Ptr), 0, 0 + + psq_l vb0, 0(srcBase), 0, 0 + psq_l vb2, 16(srcBase), 0, 0 + + ps_muls0 vd0, vb0, va0 + ps_muls0 vd2, vb0, va2 + ps_muls0 vd4, vb0, va4 + + psq_l vb4, 32(srcBase), 0, 0 + + ps_madds1 vd0, vb2, va0, vd0 + ps_madds1 vd2, vb2, va2, vd2 + ps_madds1 vd4, vb2, va4, vd4 + + psq_l vb1, 8(srcBase), 0, 0 + + ps_madds0 vd0, vb4, va1, vd0 + ps_madds0 vd2, vb4, va3, vd2 + ps_madds0 vd4, vb4, va5, vd4 + + psq_l vb3, 24(srcBase), 0, 0 + psq_st vd0, 0(dstBase), 0, 0 + + ps_muls0 vd1, vb1, va0 + ps_muls0 vd3, vb1, va2 + ps_muls0 vd5, vb1, va4 + + psq_l vb5, 40(srcBase), 0, 0 + psq_st vd2, 16(dstBase), 0, 0 + ps_madds1 vd1, vb3, va0, vd1 + ps_madds1 vd3, vb3, va2, vd3 + ps_madds1 vd5, vb3, va4, vd5 + +_loop: + addi srcBase, srcBase, sizeof(Mtx) + ps_madds0 vd1, vb5, va1, vd1 + ps_madds0 vd3, vb5, va3, vd3 + ps_madds0 vd5, vb5, va5, vd5 + psq_l vb0, 0(srcBase), 0, 0 + psq_st vd4, 32(dstBase), 0, 0 + ps_madd vd1, u01, va1, vd1 + ps_madd vd3, u01, va3, vd3 + ps_madd vd5, u01, va5, vd5 + psq_l vb2, 16(srcBase), 0, 0 + psq_st vd1, 8(dstBase), 0, 0 + ps_muls0 vd0, vb0, va0 + ps_muls0 vd2, vb0, va2 + ps_muls0 vd4, vb0, va4 + psq_l vb4, 32(srcBase), 0, 0 + psq_st vd3, 24(dstBase), 0, 0 + ps_madds1 vd0, vb2, va0, vd0 + ps_madds1 vd2, vb2, va2, vd2 + ps_madds1 vd4, vb2, va4, vd4 + psq_l vb1, 8(srcBase), 0, 0 + psq_st vd5, 40(dstBase), 0, 0 + addi dstBase, dstBase, sizeof(Mtx) + + ps_madds0 vd0, vb4, va1, vd0 + ps_madds0 vd2, vb4, va3, vd2 + ps_madds0 vd4, vb4, va5, vd4 + psq_l vb3, 24(srcBase), 0, 0 + psq_st vd0, 0(dstBase), 0, 0 + ps_muls0 vd1, vb1, va0 + ps_muls0 vd3, vb1, va2 + ps_muls0 vd5, vb1, va4 + psq_l vb5, 40(srcBase), 0, 0 + psq_st vd2, 16(dstBase), 0, 0 + ps_madds1 vd1, vb3, va0, vd1 + ps_madds1 vd3, vb3, va2, vd3 + ps_madds1 vd5, vb3, va4, vd5 + bdnz _loop + psq_st vd4, 32(dstBase), 0, 0 + ps_madds0 vd1, vb5, va1, vd1 + ps_madds0 vd3, vb5, va3, vd3 + ps_madds0 vd5, vb5, va5, vd5 + ps_madd vd1, u01, va1, vd1 + ps_madd vd3, u01, va3, vd3 + ps_madd vd5, u01, va5, vd5 + psq_st vd1, 8(dstBase), 0, 0 + psq_st vd3, 24(dstBase), 0, 0 + psq_st vd5, 40(dstBase), 0, 0 + } + // clang-format on +} + +#if (defined(__MWERKS__) && defined(_DEBUG)) +#pragma optimization_level 0 +#pragma global_optimizer reset +#endif + +#endif + +void C_MTXTranspose(const Mtx src, Mtx xPose) +{ + Mtx mTmp; + MtxPtr m; + + if (src == xPose) { + m = mTmp; + } + else { + m = xPose; + } + + m[0][0] = src[0][0]; + m[0][1] = src[1][0]; + m[0][2] = src[2][0]; + m[0][3] = 0.0f; + m[1][0] = src[0][1]; + m[1][1] = src[1][1]; + m[1][2] = src[2][1]; + m[1][3] = 0.0f; + m[2][0] = src[0][2]; + m[2][1] = src[1][2]; + m[2][2] = src[2][2]; + m[2][3] = 0.0f; + + if (m == mTmp) { + C_MTXCopy(mTmp, xPose); + } +} + +#ifdef GEKKO +void PSMTXTranspose(const register Mtx src, register Mtx xPose) +{ + register f32 c_zero = 0.0F; + register f32 row0a, row1a, row0b, row1b; + register f32 trns0, trns1, trns2; + // clang-format off + asm + { + psq_l row0a, 0(src), 0, 0 + stfs c_zero, 44(xPose) + psq_l row1a, 16(src), 0, 0 + ps_merge00 trns0, row0a, row1a + psq_l row0b, 8(src), 1, 0 + ps_merge11 trns1, row0a, row1a + psq_l row1b, 24(src), 1, 0 + psq_st trns0, 0(xPose), 0, 0 + psq_l row0a, 32(src), 0, 0 + ps_merge00 trns2, row0b, row1b + psq_st trns1, 16(xPose), 0, 0 + ps_merge00 trns0, row0a, c_zero + psq_st trns2, 32(xPose), 0, 0 + ps_merge10 trns1, row0a, c_zero + psq_st trns0, 8(xPose), 0, 0 + lfs row0b, 40(src) + psq_st trns1, 24(xPose), 0, 0 + stfs row0b, 40(xPose) + } + // clang-format on +} +#endif + +u32 C_MTXInverse(const Mtx src, Mtx inv) +{ + Mtx mTmp; + MtxPtr m; + f32 det; + + if (src == inv) { + m = mTmp; + } + else { + m = inv; + } + + det = src[0][0] * src[1][1] * src[2][2] + src[0][1] * src[1][2] * src[2][0] + src[0][2] * src[1][0] * src[2][1] + - src[2][0] * src[1][1] * src[0][2] - src[1][0] * src[0][1] * src[2][2] - src[0][0] * src[2][1] * src[1][2]; + + if (det == 0.0f) { + return 0; + } + + det = 1.0f / det; + + m[0][0] = (src[1][1] * src[2][2] - src[2][1] * src[1][2]) * det; + m[0][1] = -(src[0][1] * src[2][2] - src[2][1] * src[0][2]) * det; + m[0][2] = (src[0][1] * src[1][2] - src[1][1] * src[0][2]) * det; + + m[1][0] = -(src[1][0] * src[2][2] - src[2][0] * src[1][2]) * det; + m[1][1] = (src[0][0] * src[2][2] - src[2][0] * src[0][2]) * det; + m[1][2] = -(src[0][0] * src[1][2] - src[1][0] * src[0][2]) * det; + + m[2][0] = (src[1][0] * src[2][1] - src[2][0] * src[1][1]) * det; + m[2][1] = -(src[0][0] * src[2][1] - src[2][0] * src[0][1]) * det; + m[2][2] = (src[0][0] * src[1][1] - src[1][0] * src[0][1]) * det; + + m[0][3] = -m[0][0] * src[0][3] - m[0][1] * src[1][3] - m[0][2] * src[2][3]; + m[1][3] = -m[1][0] * src[0][3] - m[1][1] * src[1][3] - m[1][2] * src[2][3]; + m[2][3] = -m[2][0] * src[0][3] - m[2][1] * src[1][3] - m[2][2] * src[2][3]; + + if (m == mTmp) { + C_MTXCopy(mTmp, inv); + } + + return 1; +} + +#ifdef GEKKO +asm u32 PSMTXInverse(const register Mtx src, register Mtx inv) { + // clang-format off + nofralloc + + psq_l fp0, 0(src), 1, 0 + psq_l fp1, 4(src), 0, 0 + psq_l fp2, 16(src), 1, 0 + ps_merge10 fp6, fp1, fp0 + psq_l fp3, 20(src), 0, 0 + psq_l fp4, 32(src), 1, 0 + ps_merge10 fp7, fp3, fp2 + psq_l fp5, 36(src), 0, 0 + ps_mul fp11, fp3, fp6 + ps_mul fp13, fp5, fp7 + ps_merge10 fp8, fp5, fp4 + ps_msub fp11, fp1, fp7, fp11 + ps_mul fp12, fp1, fp8 + ps_msub fp13, fp3, fp8, fp13 + ps_mul fp10, fp3, fp4 + ps_msub fp12, fp5, fp6, fp12 + ps_mul fp9, fp0, fp5 + ps_mul fp8, fp1, fp2 + ps_sub fp6, fp6, fp6 + ps_msub fp10, fp2, fp5, fp10 + ps_mul fp7, fp0, fp13 + ps_msub fp9, fp1, fp4, fp9 + ps_madd fp7, fp2, fp12, fp7 + ps_msub fp8, fp0, fp3, fp8 + ps_madd fp7, fp4, fp11, fp7 + ps_cmpo0 cr0, fp7, fp6 + bne _regular + addi r3, 0, 0 + blr + +_regular: + fres fp0, fp7 + ps_add fp6, fp0, fp0 + ps_mul fp5, fp0, fp0 + ps_nmsub fp0, fp7, fp5, fp6 + lfs fp1, 12(src) + ps_muls0 fp13, fp13, fp0 + lfs fp2, 28(src) + ps_muls0 fp12, fp12, fp0 + lfs fp3, 44(src) + ps_muls0 fp11, fp11, fp0 + ps_merge00 fp5, fp13, fp12 + ps_muls0 fp10, fp10, fp0 + ps_merge11 fp4, fp13, fp12 + ps_muls0 fp9, fp9, fp0 + psq_st fp5, 0(inv), 0, 0 + ps_mul fp6, fp13, fp1 + psq_st fp4, 16(inv), 0, 0 + ps_muls0 fp8, fp8, fp0 + ps_madd fp6, fp12, fp2, fp6 + psq_st fp10, 32(inv), 1, 0 + ps_nmadd fp6, fp11, fp3, fp6 + psq_st fp9, 36(inv), 1, 0 + ps_mul fp7, fp10, fp1 + ps_merge00 fp5, fp11, fp6 + psq_st fp8, 40(inv), 1, 0 + ps_merge11 fp4, fp11, fp6 + psq_st fp5, 8(inv), 0, 0 + ps_madd fp7, fp9, fp2, fp7 + psq_st fp4, 24(inv), 0, 0 + ps_nmadd fp7, fp8, fp3, fp7 + addi r3, 0, 1 + psq_st fp7, 44(inv), 1, 0 + blr + // clang-format on +} +#endif + +u32 C_MTXInvXpose(const Mtx src, Mtx invX) +{ + Mtx mTmp; + MtxPtr m; + f32 det; + + if (src == invX) { + m = mTmp; + } + else { + m = invX; + } + + det = src[0][0] * src[1][1] * src[2][2] + src[0][1] * src[1][2] * src[2][0] + src[0][2] * src[1][0] * src[2][1] + - src[2][0] * src[1][1] * src[0][2] - src[1][0] * src[0][1] * src[2][2] - src[0][0] * src[2][1] * src[1][2]; + + if (det == 0.0f) { + return 0; + } + + det = 1.0f / det; + + m[0][0] = (src[1][1] * src[2][2] - src[2][1] * src[1][2]) * det; + m[0][1] = -(src[1][0] * src[2][2] - src[2][0] * src[1][2]) * det; + m[0][2] = (src[1][0] * src[2][1] - src[2][0] * src[1][1]) * det; + + m[1][0] = -(src[0][1] * src[2][2] - src[2][1] * src[0][2]) * det; + m[1][1] = (src[0][0] * src[2][2] - src[2][0] * src[0][2]) * det; + m[1][2] = -(src[0][0] * src[2][1] - src[2][0] * src[0][1]) * det; + + m[2][0] = (src[0][1] * src[1][2] - src[1][1] * src[0][2]) * det; + m[2][1] = -(src[0][0] * src[1][2] - src[1][0] * src[0][2]) * det; + m[2][2] = (src[0][0] * src[1][1] - src[1][0] * src[0][1]) * det; + + m[0][3] = 0.0F; + m[1][3] = 0.0F; + m[2][3] = 0.0F; + + if (m == mTmp) { + C_MTXCopy(mTmp, invX); + } + + return 1; +} + +#ifdef GEKKO +asm u32 PSMTXInvXpose(const register Mtx src, register Mtx invX) +{ + // clang-format off + nofralloc + + psq_l fp0, 0(src), 1, 0 + psq_l fp1, 4(src), 0, 0 + psq_l fp2, 16(src), 1, 0 + ps_merge10 fp6, fp1, fp0 + psq_l fp3, 20(src), 0, 0 + psq_l fp4, 32(src), 1, 0 + ps_merge10 fp7, fp3, fp2 + psq_l fp5, 36(src), 0, 0 + ps_mul fp11, fp3, fp6 + ps_merge10 fp8, fp5, fp4 + ps_mul fp13, fp5, fp7 + ps_msub fp11, fp1, fp7, fp11 + ps_mul fp12, fp1, fp8 + ps_msub fp13, fp3, fp8, fp13 + ps_msub fp12, fp5, fp6, fp12 + ps_mul fp10, fp3, fp4 + ps_mul fp9, fp0, fp5 + ps_mul fp8, fp1, fp2 + ps_msub fp10, fp2, fp5, fp10 + ps_msub fp9, fp1, fp4, fp9 + ps_msub fp8, fp0, fp3, fp8 + ps_mul fp7, fp0, fp13 + ps_sub fp1, fp1, fp1 + ps_madd fp7, fp2, fp12, fp7 + ps_madd fp7, fp4, fp11, fp7 + ps_cmpo0 cr0, fp7, fp1 + bne _regular + addi r3, 0, 0 + blr + +_regular: + fres fp0, fp7 + psq_st fp1, 12(invX), 1, 0 + ps_add fp6, fp0, fp0 + ps_mul fp5, fp0, fp0 + psq_st fp1, 28(invX), 1, 0 + ps_nmsub fp0, fp7, fp5, fp6 + psq_st fp1, 44(invX), 1, 0 + ps_muls0 fp13, fp13, fp0 + ps_muls0 fp12, fp12, fp0 + ps_muls0 fp11, fp11, fp0 + psq_st fp13, 0(invX), 0, 0 + psq_st fp12, 16(invX), 0, 0 + ps_muls0 fp10, fp10, fp0 + ps_muls0 fp9, fp9, fp0 + psq_st fp11, 32(invX), 0, 0 + psq_st fp10, 8(invX), 1, 0 + ps_muls0 fp8, fp8, fp0 + addi r3, 0, 1 + psq_st fp9, 24(invX), 1, 0 + psq_st fp8, 40(invX), 1, 0 + blr + // clang-format on +} +#endif + +void C_MTXRotRad(Mtx m, char axis, f32 rad) +{ + + f32 sinA, cosA; + sinA = sinf(rad); + cosA = cosf(rad); + C_MTXRotTrig(m, axis, sinA, cosA); +} + +#ifdef GEKKO +void PSMTXRotRad(Mtx m, char axis, f32 rad) +{ + // f32 sinA, cosA; + + // sinA = sinf(rad); + // cosA = cosf(rad); + + // PSMTXRotTrig(m, axis, sinA, cosA); +} +#endif + +void C_MTXRotTrig(Mtx m, char axis, f32 sinA, f32 cosA) +{ + switch (axis) { + + case 'x': + case 'X': + m[0][0] = 1.0f; + m[0][1] = 0.0f; + m[0][2] = 0.0f; + m[0][3] = 0.0f; + m[1][0] = 0.0f; + m[1][1] = cosA; + m[1][2] = -sinA; + m[1][3] = 0.0f; + m[2][0] = 0.0f; + m[2][1] = sinA; + m[2][2] = cosA; + m[2][3] = 0.0f; + break; + + case 'y': + case 'Y': + m[0][0] = cosA; + m[0][1] = 0.0f; + m[0][2] = sinA; + m[0][3] = 0.0f; + m[1][0] = 0.0f; + m[1][1] = 1.0f; + m[1][2] = 0.0f; + m[1][3] = 0.0f; + m[2][0] = -sinA; + m[2][1] = 0.0f; + m[2][2] = cosA; + m[2][3] = 0.0f; + break; + + case 'z': + case 'Z': + m[0][0] = cosA; + m[0][1] = -sinA; + m[0][2] = 0.0f; + m[0][3] = 0.0f; + m[1][0] = sinA; + m[1][1] = cosA; + m[1][2] = 0.0f; + m[1][3] = 0.0f; + m[2][0] = 0.0f; + m[2][1] = 0.0f; + m[2][2] = 1.0f; + m[2][3] = 0.0f; + break; + + default: + break; + } +} + +#ifdef GEKKO +void PSMTXRotTrig(register Mtx m, register char axis, register f32 sinA, register f32 cosA) +{ +// register f32 fc0, fc1, nsinA; +// register f32 fw0, fw1, fw2, fw3; +// // clang-format off +// asm +// { +// frsp sinA, sinA +// frsp cosA, cosA +// } + +// fc0 = 0.0F; +// fc1 = 1.0F; +// asm +// { +// ori axis, axis, 0x20 +// ps_neg nsinA, sinA +// cmplwi axis, 'x' +// beq _case_x +// cmplwi axis, 'y' +// beq _case_y +// cmplwi axis, 'z' +// beq _case_z +// b _end + +// _case_x: +// psq_st fc1, 0(m), 1, 0 +// psq_st fc0, 4(m), 0, 0 +// ps_merge00 fw0, sinA, cosA +// psq_st fc0, 12(m), 0, 0 +// ps_merge00 fw1, cosA, nsinA +// psq_st fc0, 28(m), 0, 0 +// psq_st fc0, 44(m), 1, 0 +// psq_st fw0, 36(m), 0, 0 +// psq_st fw1, 20(m), 0, 0 +// b _end; + +// _case_y: +// ps_merge00 fw0, cosA, fc0 +// ps_merge00 fw1, fc0, fc1 +// psq_st fc0, 24(m), 0, 0 +// psq_st fw0, 0(m), 0, 0 +// ps_merge00 fw2, nsinA, fc0 +// ps_merge00 fw3, sinA, fc0 +// psq_st fw0, 40(m), 0, 0; +// psq_st fw1, 16(m), 0, 0; +// psq_st fw3, 8(m), 0, 0; +// psq_st fw2, 32(m), 0, 0; +// b _end; + +// _case_z: +// psq_st fc0, 8(m), 0, 0 +// ps_merge00 fw0, sinA, cosA +// ps_merge00 fw2, cosA, nsinA +// psq_st fc0, 24(m), 0, 0 +// psq_st fc0, 32(m), 0, 0 +// ps_merge00 fw1, fc1, fc0 +// psq_st fw0, 16(m), 0, 0 +// psq_st fw2, 0(m), 0, 0 +// psq_st fw1, 40(m), 0, 0 + +// _end: +// } +// // clang-format on +} + +#endif + +void C_MTXRotAxisRad(Mtx m, const Vec *axis, f32 rad) +{ + Vec vN; + f32 s, c; + f32 t; + f32 x, y, z; + f32 xSq, ySq, zSq; + + s = sinf(rad); + c = cosf(rad); + t = 1.0f - c; + + C_VECNormalize(axis, &vN); + + x = vN.x; + y = vN.y; + z = vN.z; + + xSq = x * x; + ySq = y * y; + zSq = z * z; + + m[0][0] = (t * xSq) + (c); + m[0][1] = (t * x * y) - (s * z); + m[0][2] = (t * x * z) + (s * y); + m[0][3] = 0.0f; + + m[1][0] = (t * x * y) + (s * z); + m[1][1] = (t * ySq) + (c); + m[1][2] = (t * y * z) - (s * x); + m[1][3] = 0.0f; + + m[2][0] = (t * x * z) - (s * y); + m[2][1] = (t * y * z) + (s * x); + m[2][2] = (t * zSq) + (c); + m[2][3] = 0.0f; +} + +#ifdef GEKKO +static void __PSMTXRotAxisRadInternal(register Mtx m, const register Vec *axis, register f32 sT, register f32 cT) +{ + register f32 tT, fc0; + register f32 tmp0, tmp1, tmp2, tmp3, tmp4; + register f32 tmp5, tmp6, tmp7, tmp8, tmp9; + + tmp9 = 0.5F; + tmp8 = 3.0F; + // clang-format off + asm + { + frsp cT, cT + psq_l tmp0, 0(axis), 0, 0 + frsp sT, sT + lfs tmp1, 8(axis) + ps_mul tmp2, tmp0, tmp0 + fadds tmp7, tmp9, tmp9 + ps_madd tmp3, tmp1, tmp1, tmp2 + fsubs fc0, tmp9, tmp9 + ps_sum0 tmp4, tmp3, tmp1, tmp2 + fsubs tT, tmp7, cT + frsqrte tmp5, tmp4 + fmuls tmp2, tmp5, tmp5 + fmuls tmp3, tmp5, tmp9 + fnmsubs tmp2, tmp2, tmp4, tmp8 + fmuls tmp5, tmp2, tmp3 + ps_merge00 cT, cT, cT + ps_muls0 tmp0, tmp0, tmp5 + ps_muls0 tmp1, tmp1, tmp5 + ps_muls0 tmp4, tmp0, tT + ps_muls0 tmp9, tmp0, sT + ps_muls0 tmp5, tmp1, tT + ps_muls1 tmp3, tmp4, tmp0 + ps_muls0 tmp2, tmp4, tmp0 + ps_muls0 tmp4, tmp4, tmp1 + fnmsubs tmp6, tmp1, sT, tmp3 + fmadds tmp7, tmp1, sT, tmp3 + ps_neg tmp0, tmp9 + ps_sum0 tmp8, tmp4, fc0, tmp9 + ps_sum0 tmp2, tmp2, tmp6, cT + ps_sum1 tmp3, cT, tmp7, tmp3 + ps_sum0 tmp6, tmp0, fc0 ,tmp4 + psq_st tmp8, 8(m), 0, 0 + ps_sum0 tmp0, tmp4, tmp4, tmp0 + psq_st tmp2, 0(m), 0, 0 + ps_muls0 tmp5, tmp5, tmp1 + psq_st tmp3, 16(m), 0, 0 + ps_sum1 tmp4, tmp9, tmp0, tmp4 + psq_st tmp6, 24(m), 0, 0 + ps_sum0 tmp5, tmp5, fc0, cT + psq_st tmp4, 32(m), 0, 0 + psq_st tmp5, 40(m), 0, 0 + } + // clang-format on +} + +void PSMTXRotAxisRad(Mtx m, const Vec *axis, f32 rad) +{ + // f32 sinT, cosT; + + // sinT = sinf(rad); + // cosT = cosf(rad); + + // __PSMTXRotAxisRadInternal(m, axis, sinT, cosT); +} + +#endif + +void C_MTXTrans(Mtx m, f32 xT, f32 yT, f32 zT) +{ + m[0][0] = 1.0f; + m[0][1] = 0.0f; + m[0][2] = 0.0f; + m[0][3] = xT; + m[1][0] = 0.0f; + m[1][1] = 1.0f; + m[1][2] = 0.0f; + m[1][3] = yT; + m[2][0] = 0.0f; + m[2][1] = 0.0f; + m[2][2] = 1.0f; + m[2][3] = zT; +} + +#ifdef GEKKO +void PSMTXTrans(register Mtx m, register f32 xT, register f32 yT, register f32 zT) +{ + register f32 c0 = 0.0F; + register f32 c1 = 1.0F; + // clang-format off + asm + { + stfs xT, 12(m) + stfs yT, 28(m) + psq_st c0, 4(m), 0, 0 + psq_st c0, 32(m), 0, 0 + stfs c0, 16(m) + stfs c1, 20(m) + stfs c0, 24(m) + stfs c1, 40(m) + stfs zT, 44(m) + stfs c1, 0(m) + } + // clang-format on +} +#endif + +void C_MTXTransApply(const Mtx src, Mtx dst, f32 xT, f32 yT, f32 zT) +{ + if (src != dst) { + dst[0][0] = src[0][0]; + dst[0][1] = src[0][1]; + dst[0][2] = src[0][2]; + dst[1][0] = src[1][0]; + dst[1][1] = src[1][1]; + dst[1][2] = src[1][2]; + dst[2][0] = src[2][0]; + dst[2][1] = src[2][1]; + dst[2][2] = src[2][2]; + } + + dst[0][3] = src[0][3] + xT; + dst[1][3] = src[1][3] + yT; + dst[2][3] = src[2][3] + zT; +} + +#ifdef GEKKO +asm void PSMTXTransApply(const register Mtx src, register Mtx dst, register f32 xT, register f32 yT, register f32 zT) +{ + // clang-format off + nofralloc; + psq_l fp4, 0(src), 0, 0 + frsp xT, xT + psq_l fp5, 8(src), 0, 0 + frsp yT, yT + psq_l fp7, 24(src), 0, 0 + frsp zT, zT + psq_l fp8, 40(src), 0, 0 + psq_st fp4, 0(dst), 0, 0 + ps_sum1 fp5, xT, fp5, fp5 + psq_l fp6, 16(src), 0, 0 + psq_st fp5, 8(dst), 0, 0 + ps_sum1 fp7, yT, fp7, fp7 + psq_l fp9, 32(src), 0, 0 + psq_st fp6, 16(dst), 0, 0 + ps_sum1 fp8, zT, fp8, fp8 + psq_st fp7, 24(dst), 0, 0 + psq_st fp9, 32(dst), 0, 0 + psq_st fp8, 40(dst), 0, 0 + blr + // clang-format on +} +#endif + +void C_MTXScale(Mtx m, f32 xS, f32 yS, f32 zS) +{ + m[0][0] = xS; + m[0][1] = 0.0f; + m[0][2] = 0.0f; + m[0][3] = 0.0f; + m[1][0] = 0.0f; + m[1][1] = yS; + m[1][2] = 0.0f; + m[1][3] = 0.0f; + m[2][0] = 0.0f; + m[2][1] = 0.0f; + m[2][2] = zS; + m[2][3] = 0.0f; +} + +#ifdef GEKKO +void PSMTXScale(register Mtx m, register f32 xS, register f32 yS, register f32 zS) +{ + register f32 c0 = 0.0F; + // clang-format off + asm + { + stfs xS, 0(m) + psq_st c0, 4(m), 0, 0 + psq_st c0, 12(m), 0, 0 + stfs yS, 20(m) + psq_st c0, 24(m), 0, 0 + psq_st c0, 32(m), 0, 0 + stfs zS, 40(m) + stfs c0, 44(m) + } + // clang-format on +} +#endif + +void C_MTXScaleApply(const Mtx src, Mtx dst, f32 xS, f32 yS, f32 zS) +{ + dst[0][0] = src[0][0] * xS; + dst[0][1] = src[0][1] * xS; + dst[0][2] = src[0][2] * xS; + dst[0][3] = src[0][3] * xS; + + dst[1][0] = src[1][0] * yS; + dst[1][1] = src[1][1] * yS; + dst[1][2] = src[1][2] * yS; + dst[1][3] = src[1][3] * yS; + + dst[2][0] = src[2][0] * zS; + dst[2][1] = src[2][1] * zS; + dst[2][2] = src[2][2] * zS; + dst[2][3] = src[2][3] * zS; +} + +#ifdef GEKKO +asm void PSMTXScaleApply(const register Mtx src, register Mtx dst, register f32 xS, register f32 yS, register f32 zS) +{ + // clang-format off + nofralloc; + frsp xS, xS + psq_l fp4, 0(src), 0, 0 + frsp yS, yS + psq_l fp5, 8(src), 0, 0 + frsp zS, zS + ps_muls0 fp4, fp4, xS + psq_l fp6, 16(src), 0, 0 + ps_muls0 fp5, fp5, xS + psq_l fp7, 24(src), 0, 0 + ps_muls0 fp6, fp6, yS + psq_l fp8, 32(src), 0, 0 + psq_st fp4, 0(dst), 0, 0 + ps_muls0 fp7, fp7, yS + psq_l fp2, 40(src), 0, 0 + psq_st fp5, 8(dst), 0, 0 + ps_muls0 fp8, fp8, zS + psq_st fp6, 16(dst), 0, 0 + ps_muls0 fp2, fp2, zS + psq_st fp7, 24(dst), 0, 0 + psq_st fp8, 32(dst), 0, 0 + psq_st fp2, 40(dst), 0, 0 + blr + // clang-format on +} +#endif + +void C_MTXQuat(Mtx m, const Quaternion *q) +{ + + f32 s, xs, ys, zs, wx, wy, wz, xx, xy, xz, yy, yz, zz; + s = 2.0f / ((q->x * q->x) + (q->y * q->y) + (q->z * q->z) + (q->w * q->w)); + + xs = q->x * s; + ys = q->y * s; + zs = q->z * s; + wx = q->w * xs; + wy = q->w * ys; + wz = q->w * zs; + xx = q->x * xs; + xy = q->x * ys; + xz = q->x * zs; + yy = q->y * ys; + yz = q->y * zs; + zz = q->z * zs; + + m[0][0] = 1.0f - (yy + zz); + m[0][1] = xy - wz; + m[0][2] = xz + wy; + m[0][3] = 0.0f; + + m[1][0] = xy + wz; + m[1][1] = 1.0f - (xx + zz); + m[1][2] = yz - wx; + m[1][3] = 0.0f; + + m[2][0] = xz - wy; + m[2][1] = yz + wx; + m[2][2] = 1.0f - (xx + yy); + m[2][3] = 0.0f; +} + +#ifdef GEKKO +void PSMTXQuat(register Mtx m, const register Quaternion *q) +{ + register f32 c_zero, c_one, c_two, scale; + register f32 tmp0, tmp1, tmp2, tmp3, tmp4; + register f32 tmp5, tmp6, tmp7, tmp8, tmp9; + + c_one = 1.0F; + // clang-format off + asm + { + psq_l tmp0, 0(q), 0, 0 + psq_l tmp1, 8(q), 0, 0 + fsubs c_zero, c_one, c_one + fadds c_two, c_one, c_one + ps_mul tmp2, tmp0, tmp0 + ps_merge10 tmp5, tmp0, tmp0 + ps_madd tmp4, tmp1, tmp1, tmp2 + ps_mul tmp3, tmp1, tmp1 + ps_sum0 scale, tmp4, tmp4, tmp4 + ps_muls1 tmp7, tmp5, tmp1 + fres tmp9, scale + ps_sum1 tmp4, tmp3, tmp4, tmp2 + ps_nmsub scale, scale, tmp9, c_two + ps_muls1 tmp6, tmp1, tmp1 + ps_mul scale, tmp9, scale + ps_sum0 tmp2, tmp2, tmp2, tmp2 + fmuls scale, scale, c_two + ps_madd tmp8, tmp0, tmp5, tmp6 + ps_msub tmp6, tmp0, tmp5, tmp6 + psq_st c_zero, 12(m), 1, 0 + ps_nmsub tmp2, tmp2, scale, c_one + ps_nmsub tmp4, tmp4, scale, c_one + psq_st c_zero, 44(m), 1, 0 + ps_mul tmp8, tmp8, scale + ps_mul tmp6, tmp6, scale + psq_st tmp2, 40(m), 1, 0 + ps_madds0 tmp5, tmp0, tmp1, tmp7 + ps_merge00 tmp1, tmp8, tmp4 + ps_nmsub tmp7, tmp7, c_two, tmp5 + ps_merge10 tmp0, tmp4, tmp6 + psq_st tmp1, 16(m), 0, 0 + ps_mul tmp5, tmp5, scale + ps_mul tmp7, tmp7, scale + psq_st tmp0, 0(m), 0, 0 + psq_st tmp5, 8(m), 1, 0 + ps_merge10 tmp3, tmp7, c_zero + ps_merge01 tmp9, tmp7, tmp5 + psq_st tmp3, 24(m), 0, 0 + psq_st tmp9, 32(m), 0, 0 + } + // clang-format on +} +#endif + +void C_MTXReflect(Mtx m, const Vec *p, const Vec *n) +{ + f32 vxy, vxz, vyz, pdotn; + + vxy = -2.0f * n->x * n->y; + vxz = -2.0f * n->x * n->z; + vyz = -2.0f * n->y * n->z; + pdotn = 2.0f * C_VECDotProduct(p, n); + + m[0][0] = 1.0f - 2.0f * n->x * n->x; + m[0][1] = vxy; + m[0][2] = vxz; + m[0][3] = pdotn * n->x; + + m[1][0] = vxy; + m[1][1] = 1.0f - 2.0f * n->y * n->y; + m[1][2] = vyz; + m[1][3] = pdotn * n->y; + + m[2][0] = vxz; + m[2][1] = vyz; + m[2][2] = 1.0f - 2.0f * n->z * n->z; + m[2][3] = pdotn * n->z; +} + +#ifdef GEKKO +void PSMTXReflect(register Mtx m, const register Vec *p, const register Vec *n) +{ + register f32 c_one = 1.0F; + register f32 vn_xy, vn_z1, n2vn_xy, n2vn_z1, pdotn; + register f32 tmp0, tmp1, tmp2, tmp3; + register f32 tmp4, tmp5, tmp6, tmp7; + // clang-format off + asm + { + psq_l vn_z1, 8(n), 1, 0 + psq_l vn_xy, 0(n), 0, 0 + psq_l tmp0, 0(p), 0, 0 + ps_nmadd n2vn_z1, vn_z1, c_one, vn_z1 + psq_l tmp1, 8(p), 1, 0 + ps_nmadd n2vn_xy, vn_xy, c_one, vn_xy + ps_muls0 tmp4, vn_xy, n2vn_z1 + ps_mul pdotn, n2vn_xy, tmp0 + ps_muls0 tmp2, vn_xy, n2vn_xy + ps_sum0 pdotn, pdotn, pdotn, pdotn + ps_muls1 tmp3, vn_xy, n2vn_xy + psq_st tmp4, 32(m), 0, 0 + ps_sum0 tmp2, tmp2, tmp2, c_one + ps_nmadd pdotn, n2vn_z1, tmp1, pdotn + ps_sum1 tmp3, c_one, tmp3, tmp3 + psq_st tmp2, 0(m), 0, 0 + ps_muls0 tmp5, vn_xy, pdotn + ps_merge00 tmp6, n2vn_z1, pdotn + psq_st tmp3, 16(m), 0, 0 + ps_merge00 tmp7, tmp4, tmp5 + ps_muls0 tmp6, tmp6, vn_z1 + ps_merge11 tmp5, tmp4, tmp5 + psq_st tmp7, 8(m), 0, 0 + ps_sum0 tmp6, tmp6, tmp6, c_one + psq_st tmp5, 24(m), 0, 0 + psq_st tmp6, 40(m), 0, 0 + } + // clang-format on +} +#endif + +void C_MTXLookAt(Mtx m, const Point3d *camPos, const Vec *camUp, const Point3d *target) +{ + // Vec vLook, vRight, vUp; + + // vLook.x = camPos->x - target->x; + // vLook.y = camPos->y - target->y; + // vLook.z = camPos->z - target->z; + // VECNormalize(&vLook, &vLook); + // VECCrossProduct(camUp, &vLook, &vRight); + // VECNormalize(&vRight, &vRight); + // VECCrossProduct(&vLook, &vRight, &vUp); + + // m[0][0] = vRight.x; + // m[0][1] = vRight.y; + // m[0][2] = vRight.z; + // m[0][3] = -(camPos->x * vRight.x + camPos->y * vRight.y + camPos->z * vRight.z); + + // m[1][0] = vUp.x; + // m[1][1] = vUp.y; + // m[1][2] = vUp.z; + // m[1][3] = -(camPos->x * vUp.x + camPos->y * vUp.y + camPos->z * vUp.z); + + // m[2][0] = vLook.x; + // m[2][1] = vLook.y; + // m[2][2] = vLook.z; + // m[2][3] = -(camPos->x * vLook.x + camPos->y * vLook.y + camPos->z * vLook.z); +} + +void C_MTXLightFrustum(Mtx m, float t, float b, float l, float r, float n, float scaleS, float scaleT, float transS, float transT) +{ + f32 tmp; + + tmp = 1.0f / (r - l); + m[0][0] = ((2 * n) * tmp) * scaleS; + m[0][1] = 0.0f; + m[0][2] = (((r + l) * tmp) * scaleS) - transS; + m[0][3] = 0.0f; + + tmp = 1.0f / (t - b); + m[1][0] = 0.0f; + m[1][1] = ((2 * n) * tmp) * scaleT; + m[1][2] = (((t + b) * tmp) * scaleT) - transT; + m[1][3] = 0.0f; + + m[2][0] = 0.0f; + m[2][1] = 0.0f; + m[2][2] = -1.0f; + m[2][3] = 0.0f; +} + +void C_MTXLightPerspective(Mtx m, f32 fovY, f32 aspect, float scaleS, float scaleT, float transS, float transT) +{ + // f32 angle; + // f32 cot; + + // angle = fovY * 0.5f; + // angle = MTXDegToRad(angle); + + // cot = 1.0f / tanf(angle); + + // m[0][0] = (cot / aspect) * scaleS; + // m[0][1] = 0.0f; + // m[0][2] = -transS; + // m[0][3] = 0.0f; + + // m[1][0] = 0.0f; + // m[1][1] = cot * scaleT; + // m[1][2] = -transT; + // m[1][3] = 0.0f; + + // m[2][0] = 0.0f; + // m[2][1] = 0.0f; + // m[2][2] = -1.0f; + // m[2][3] = 0.0f; +} + +void C_MTXLightOrtho(Mtx m, f32 t, f32 b, f32 l, f32 r, float scaleS, float scaleT, float transS, float transT) +{ + f32 tmp; + tmp = 1.0f / (r - l); + m[0][0] = (2.0f * tmp * scaleS); + m[0][1] = 0.0f; + m[0][2] = 0.0f; + m[0][3] = ((-(r + l) * tmp) * scaleS) + transS; + + tmp = 1.0f / (t - b); + m[1][0] = 0.0f; + m[1][1] = (2.0f * tmp) * scaleT; + m[1][2] = 0.0f; + m[1][3] = ((-(t + b) * tmp) * scaleT) + transT; + + m[2][0] = 0.0f; + m[2][1] = 0.0f; + m[2][2] = 0.0f; + m[2][3] = 1.0f; +} diff --git a/src/dolphin/mtx/mtx44.c b/src/dolphin/mtx/mtx44.c new file mode 100644 index 00000000..a6c2faa9 --- /dev/null +++ b/src/dolphin/mtx/mtx44.c @@ -0,0 +1,99 @@ +#include "dolphin/mtx.h" +#include "math.h" + +void C_MTXFrustum(Mtx44 m, f32 arg1, f32 arg2, f32 arg3, f32 arg4, f32 arg5, f32 arg6) +{ + f32 tmp = 1.0f / (arg4 - arg3); + m[0][0] = (2 * arg5) * tmp; + m[0][1] = 0.0f; + m[0][2] = (arg4 + arg3) * tmp; + m[0][3] = 0.0f; + tmp = 1.0f / (arg1 - arg2); + m[1][0] = 0.0f; + m[1][1] = (2 * arg5) * tmp; + m[1][2] = (arg1 + arg2) * tmp; + m[1][3] = 0.0f; + m[2][0] = 0.0f; + m[2][1] = 0.0f; + tmp = 1.0f / (arg6 - arg5); + m[2][2] = -(arg5)*tmp; + m[2][3] = -(arg6 * arg5) * tmp; + m[3][0] = 0.0f; + m[3][1] = 0.0f; + m[3][2] = -1.0f; + m[3][3] = 0.0f; +} + +// Functions match but has issues with float constants +void C_MTXPerspective(Mtx44 m, f32 fovY, f32 aspect, f32 n, f32 f) +{ + f32 angle = fovY * 0.5f; + f32 cot; + f32 tmp; + angle = MTXDegToRad(angle); + cot = 1.0f / tanf(angle); + m[0][0] = cot / aspect; + m[0][1] = 0.0f; + m[0][2] = 0.0f; + m[0][3] = 0.0f; + m[1][0] = 0.0f; + m[1][1] = cot; + m[1][2] = 0.0f; + m[1][3] = 0.0f; + m[2][0] = 0.0f; + m[2][1] = 0.0f; + tmp = 1.0f / (f - n); + m[2][2] = -(n)*tmp; + m[2][3] = -(f * n) * tmp; + m[3][0] = 0.0f; + m[3][1] = 0.0f; + m[3][2] = -1.0f; + m[3][3] = 0.0f; +} + +void C_MTXOrtho(Mtx44 m, f32 t, f32 b, f32 l, f32 r, f32 n, f32 f) +{ + f32 tmp = 1.0f / (r - l); + m[0][0] = 2.0f * tmp; + m[0][1] = 0.0f; + m[0][2] = 0.0f; + m[0][3] = -(r + l) * tmp; + tmp = 1.0f / (t - b); + m[1][0] = 0.0f; + m[1][1] = 2.0f * tmp; + m[1][2] = 0.0f; + m[1][3] = -(t + b) * tmp; + m[2][0] = 0.0f; + m[2][1] = 0.0f; + tmp = 1.0f / (f - n); + m[2][2] = -(1.0f) * tmp; + m[2][3] = -(f)*tmp; + m[3][0] = 0.0f; + m[3][1] = 0.0f; + m[3][2] = 0.0f; + m[3][3] = 1.0f; +} + +asm void PSMTX44Copy(register Mtx44 src, register Mtx44 dest) +{ +#ifdef __MWERKS__ // clang-format off + nofralloc; + psq_l fp1, 0(src), 0, 0; + psq_st fp1, 0(dest), 0, 0; + psq_l fp1, 8(src), 0, 0; + psq_st fp1, 8(dest), 0, 0; + psq_l fp1, 0x10(src), 0, 0; + psq_st fp1, 0x10(dest), 0, 0; + psq_l fp1, 0x18(src), 0, 0; + psq_st fp1, 0x18(dest), 0, 0; + psq_l fp1, 0x20(src), 0, 0; + psq_st fp1, 0x20(dest), 0, 0; + psq_l fp1, 0x28(src), 0, 0; + psq_st fp1, 0x28(dest), 0, 0; + psq_l fp1, 0x30(src), 0, 0; + psq_st fp1, 0x30(dest), 0, 0; + psq_l fp1, 0x38(src), 0, 0; + psq_st fp1, 0x38(dest), 0, 0; + blr; +#endif // clang-format on +} diff --git a/src/dolphin/mtx/mtxvec.c b/src/dolphin/mtx/mtxvec.c new file mode 100644 index 00000000..d4e70ab0 --- /dev/null +++ b/src/dolphin/mtx/mtxvec.c @@ -0,0 +1,146 @@ +#include "dolphin/mtx.h" + +asm void PSMTXMultVec(const register Mtx m, const register Vec* in, register Vec* out) { +#ifdef __MWERKS__ // clang-format off + nofralloc; + psq_l fp0, 0(in), 0, 0; + psq_l fp2, 0(m), 0, 0; + psq_l fp1, 8(in), 1, 0; + ps_mul fp4, fp2, fp0; + psq_l fp3, 8(m), 0, 0; + ps_madd fp5, fp3, fp1, fp4; + psq_l fp8, 16(m), 0, 0; + ps_sum0 fp6, fp5, fp6, fp5; + psq_l fp9, 24(m), 0, 0; + ps_mul fp10, fp8, fp0; + psq_st fp6, 0(out), 1, 0; + ps_madd fp11, fp9, fp1, fp10; + psq_l fp2, 32(m), 0, 0; + ps_sum0 fp12, fp11, fp12, fp11; + psq_l fp3, 40(m), 0, 0; + ps_mul fp4, fp2, fp0; + psq_st fp12, 4(out), 1, 0; + ps_madd fp5, fp3, fp1, fp4; + ps_sum0 fp6, fp5, fp6, fp5; + psq_st fp6, 8(out), 1, 0; + blr +#endif // clang-format on +} + +asm void PSMTXMultVecArray(register const Mtx m, register const Vec* srcBase, register Vec* dstBase, + register u32 count) { +#ifdef __MWERKS__ // clang-format off + nofralloc + + psq_l f13, 0(m), 0, 0 + psq_l f12, 16(m), 0, 0 + addi count, count, -1 + psq_l f11, 8(m), 0, 0 + ps_merge00 f0, f13, f12 + addi dstBase, dstBase, -4 + psq_l f10, 24(m), 0, 0 + ps_merge11 f1, f13, f12 + mtctr count + psq_l f4, 32(m), 0, 0 + ps_merge00 f2, f11, f10 + psq_l f5, 40(m), 0, 0 + ps_merge11 f3, f11, f10 + psq_l f6, 0(srcBase), 0, 0 + psq_lu f7, 8(srcBase), 1, 0 + ps_madds0 f8, f0, f6, f3 + ps_mul f9, f4, f6 + ps_madds1 f8, f1, f6, f8 + ps_madd f10, f5, f7, f9 + +lbl_80346E0C: + psq_lu f6, 4(srcBase), 0, 0 + ps_madds0 f12, f2, f7, f8 + psq_lu f7, 8(srcBase), 1, 0 + ps_sum0 f13, f10, f9, f10 + ps_madds0 f8, f0, f6, f3 + ps_mul f9, f4, f6 + psq_stu f12, 4(dstBase), 0, 0 + ps_madds1 f8, f1, f6, f8 + psq_stu f13, 8(dstBase), 1, 0 + ps_madd f10, f5, f7, f9 + bdnz lbl_80346E0C + + ps_madds0 f12, f2, f7, f8 + ps_sum0 f13, f10, f9, f10 + psq_stu f12, 4(dstBase), 0, 0 + psq_stu f13, 8(dstBase), 1, 0 + blr +#endif // clang-format on +} + +asm void PSMTXMultVecSR(const register Mtx mtx, const register Vec* in, register Vec* out) { +#ifdef __MWERKS__ // clang-format off + nofralloc; + psq_l fp0, 0(mtx), 0, 0; + psq_l fp6, 0(in), 0, 0; + psq_l fp2, 0x10(mtx), 0, 0; + ps_mul fp8, fp0, fp6; + psq_l fp4, 0x20(mtx), 0, 0; + ps_mul fp10, fp2, fp6; + psq_l fp7, 8(in), 1, 0; + ps_mul fp12, fp4, fp6; + psq_l fp3, 0x18(mtx), 0, 0; + ps_sum0 fp8, fp8, fp8, fp8; + psq_l fp5, 0x28(mtx), 0, 0; + ps_sum0 fp10, fp10, fp10, fp10; + psq_l fp1, 8(mtx), 0, 0; + ps_sum0 fp12, fp12, fp12, fp12; + ps_madd fp9, fp1, fp7, fp8; + psq_st fp9, 0(out), 1, 0; + ps_madd fp11, fp3, fp7, fp10; + psq_st fp11, 4(out), 1, 0; + ps_madd fp13, fp5, fp7, fp12; + psq_st fp13, 8(out), 1, 0; + blr +#endif // clang-format on +} + +asm void PSMTXMultVecArraySR(register const Mtx m, register const Vec* srcBase, + register Vec* dstBase, register u32 count) { +#ifdef __MWERKS__ // clang-format off + nofralloc + + psq_l f13, 0(m), 0, 0 + psq_l f12, 16(m), 0, 0 + addi count, count, -1 + psq_l f11, 8(m), 1, 0 + ps_merge00 f0, f13, f12 + addi dstBase, dstBase, -4 + psq_l f10, 24(m), 1, 0 + ps_merge11 f1, f13, f12 + mtctr count + psq_l f3, 32(m), 0, 0 + ps_merge00 f2, f11, f10 + psq_l f4, 40(m), 1, 0 + psq_l f6, 0(srcBase), 0, 0 + psq_lu f7, 8(srcBase), 1, 0 + ps_muls0 f8, f0, f6 + ps_mul f9, f3, f6 + ps_madds1 f8, f1, f6, f8 + ps_madd f10, f4, f7, f9 + +lbl_80346EE8: + psq_lu f6, 4(srcBase), 0, 0 + ps_madds0 f12, f2, f7, f8 + psq_lu f7, 8(srcBase), 1, 0 + ps_sum0 f13, f10, f9, f9 + ps_muls0 f8, f0, f6 + ps_mul f9, f3, f6 + psq_stu f12, 4(dstBase), 0, 0 + ps_madds1 f8, f1, f6, f8 + psq_stu f13, 8(dstBase), 1, 0 + ps_madd f10, f4, f7, f9 + bdnz lbl_80346EE8 + + ps_madds0 f12, f2, f7, f8 + ps_sum0 f13, f10, f9, f9 + psq_stu f12, 4(dstBase), 0, 0 + psq_stu f13, 8(dstBase), 1, 0 + blr +#endif // clang-format on +} diff --git a/src/dolphin/mtx/psmtx.c b/src/dolphin/mtx/psmtx.c new file mode 100644 index 00000000..0a6c5276 --- /dev/null +++ b/src/dolphin/mtx/psmtx.c @@ -0,0 +1,355 @@ +#include +#include + +#define qr0 0 +#define qr1 1 +#define qr6 6 + +asm void PSMTXReorder(const register Mtx src, register ROMtx dest) +{ + /* clang-format off */ + psq_l f0, 0(src), 0, qr0 + psq_l f2, 16(src), 0, qr0 + psq_l f4, 32(src), 0, qr0 + psq_l f1, 8(src), 0, qr0 + ps_merge00 f6, f0, f2 + psq_l f3, 24(src), 0, qr0 + ps_merge01 f12, f4, f0 + psq_l f5, 40(src), 0, qr0 + ps_merge11 f7, f2, f4 + psq_st f6, 0(dest), 0, qr0 + ps_merge00 f8, f1, f3 + psq_st f12, 8(dest), 0, qr0 + ps_merge01 f9, f5, f1 + psq_st f7, 16(dest), 0, qr0 + ps_merge11 f10, f3, f5 + psq_st f8, 24(dest), 0, qr0 + psq_st f9, 32(dest), 0, qr0 + psq_st f10, 40(dest), 0, qr0 + /* clang-format on */ +} + +asm void PSMTXROMultVecArray(const register ROMtx m, const register Vec *srcBase, register Vec *dstBase, register u32 count) +{ + /* clang-format off */ + nofralloc + stwu r1, -64(r1) + stfd f14, 8(r1) + subi r7, count, 1 + stfd f15, 16(r1) + srwi r7, r7, 1 + stfd f16, 24(r1) + stfd f17, 32(r1) + stfd f18, 40(r1) + mtctr r7 + psq_l f0, 0(m), 0, qr0 + subi srcBase, srcBase, 8 + psq_l f1, 8(m), 1, qr0 + subi dstBase, dstBase, 4 + psq_l f6, 36(m), 0, qr0 + psq_lu f8, 8(srcBase), 0, qr0 + psq_l f7, 44(m), 1, qr0 + psq_lu f9, 8(srcBase), 0, qr0 + ps_madds0 f11, f0, f8, f6 + psq_l f2, 12(m), 0, qr0 + ps_madds0 f12, f1, f8, f7 + psq_l f3, 20(m), 1, qr0 + ps_madds1 f13, f0, f9, f6 + psq_lu f10, 8(srcBase), 0, qr0 + ps_madds1 f14, f1, f9, f7 + psq_l f5, 32(m), 1, qr0 + ps_madds1 f11, f2, f8, f11 + ps_madds1 f12, f3, f8, f12 + psq_l f4, 24(m), 0, qr0 + ps_madds0 f13, f2, f10, f13 + psq_lu f8, 8(srcBase), 0, qr0 + ps_madds0 f14, f3, f10, f14 + ps_madds0 f15, f4, f9, f11 + ps_madds0 f16, f5, f9, f12 + psq_lu f9, 8(srcBase), 0, qr0 + ps_madds1 f17, f4, f10, f13 + ps_madds1 f18, f5, f10, f14 + psq_lu f10, 8(srcBase), 0, qr0 +loop: + ps_madds0 f11, f0, f8, f6 + psq_stu f15, 4(dstBase), 0, qr0 + ps_madds0 f12, f1, f8, f7 + psq_stu f16, 8(dstBase), 1, qr0 + ps_madds1 f13, f0, f9, f6 + psq_stu f17, 4(dstBase), 0, qr0 + ps_madds1 f14, f1, f9, f7 + psq_stu f18, 8(dstBase), 1, qr0 + ps_madds1 f11, f2, f8, f11 + ps_madds1 f12, f3, f8, f12 + psq_lu f8, 8(srcBase), 0, qr0 + ps_madds0 f13, f2, f10, f13 + ps_madds0 f14, f3, f10, f14 + ps_madds0 f15, f4, f9, f11 + ps_madds0 f16, f5, f9, f12 + psq_lu f9, 8(srcBase), 0, qr0 + ps_madds1 f17, f4, f10, f13 + ps_madds1 f18, f5, f10, f14 + psq_lu f10, 8(srcBase), 0, qr0 + bdnz loop + psq_stu f15, 4(dstBase), 0, qr0 + clrlwi. r7, count, 31 + psq_stu f16, 8(dstBase), 1, qr0 + bne exit + psq_stu f17, 4(dstBase), 0, qr0 + psq_stu f18, 8(dstBase), 1, qr0 +exit: + lfd f14, 8(r1) + lfd f15, 16(r1) + lfd f16, 24(r1) + lfd f17, 32(r1) + lfd f18, 40(r1) + addi r1, r1, 64 + blr + /* clang-format on */ +} + +asm void PSMTXROSkin2VecArray(const register ROMtx m0, const register ROMtx m1, const register f32 *wtBase, const register Vec *srcBase, + register Vec *dstBase, register u32 count) +{ + /* clang-format off */ + nofralloc + stwu r1, -160(r1) + stfd f14, 8(r1) + stfd f15, 16(r1) + stfd f16, 24(r1) + stfd f17, 32(r1) + stfd f18, 40(r1) + stfd f19, 48(r1) + stfd f20, 56(r1) + stfd f21, 64(r1) + stfd f22, 72(r1) + stfd f23, 80(r1) + stfd f24, 88(r1) + stfd f25, 96(r1) + stfd f26, 104(r1) + stfd f27, 112(r1) + stfd f28, 120(r1) + stfd f29, 128(r1) + stfd f30, 136(r1) + subi r9, r8, 1 + mtctr r9 + subi srcBase, srcBase, 4 + subi dstBase, dstBase, 4 + subi wtBase, wtBase, 4 + psq_l f14, 0(m0), 0, qr0 + psq_l f22, 0(m1), 0, qr0 + psq_l f15, 8(m0), 1, qr0 + psq_l f23, 8(m1), 1, qr0 + psq_l f16, 12(m0), 0, qr0 + psq_l f24, 12(m1), 0, qr0 + ps_sub f22, f22, f14 + psq_l f17, 20(m0), 1, qr0 + psq_l f25, 20(m1), 1, qr0 + ps_sub f23, f23, f15 + psq_l f18, 24(m0), 0, qr0 + psq_l f26, 24(m1), 0, qr0 + ps_sub f24, f24, f16 + psq_l f19, 32(m0), 1, qr0 + psq_l f27, 32(m1), 1, qr0 + ps_sub f25, f25, f17 + psq_l f20, 36(m0), 0, qr0 + psq_l f28, 36(m1), 0, qr0 + ps_sub f26, f26, f18 + psq_l f21, 44(m0), 1, qr0 + psq_l f29, 44(m1), 1, qr0 + ps_sub f27, f27, f19 + ps_sub f28, f28, f20 + ps_sub f29, f29, f21 + psq_lu f30, 4(wtBase), 1, qr0 + psq_lu f8, 4(srcBase), 0, qr0 + psq_lu f9, 8(srcBase), 1, qr0 + ps_madds0 f0, f22, f30, f14 + ps_madds0 f1, f23, f30, f15 + ps_madds0 f2, f24, f30, f16 + ps_madds0 f3, f25, f30, f17 + ps_madds0 f4, f26, f30, f18 + ps_madds0 f5, f27, f30, f19 + ps_madds0 f6, f28, f30, f20 + ps_madds0 f7, f29, f30, f21 + ps_madds0 f12, f0, f8, f6 + ps_madds0 f13, f1, f8, f7 + psq_lu f30, 4(wtBase), 1, qr0 +loop: + ps_madds1 f12, f2, f8, f12 + ps_madds1 f13, f3, f8, f13 + psq_lu f8, 4(srcBase), 0, qr0 + ps_madds0 f10, f4, f9, f12 + ps_madds0 f11, f5, f9, f13 + psq_lu f9, 8(srcBase), 1, qr0 + ps_madds0 f0, f22, f30, f14 + ps_madds0 f1, f23, f30, f15 + ps_madds0 f2, f24, f30, f16 + ps_madds0 f3, f25, f30, f17 + ps_madds0 f4, f26, f30, f18 + ps_madds0 f5, f27, f30, f19 + ps_madds0 f6, f28, f30, f20 + ps_madds0 f7, f29, f30, f21 + psq_stu f10, 4(dstBase), 0, qr0 + ps_madds0 f12, f0, f8, f6 + ps_madds0 f13, f1, f8, f7 + psq_stu f11, 8(dstBase), 1, qr0 + psq_lu f30, 4(wtBase), 1, qr0 + bdnz loop + ps_madds1 f12, f2, f8, f12 + ps_madds1 f13, f3, f8, f13 + ps_madds0 f10, f4, f9, f12 + psq_stu f10, 4(dstBase), 0, qr0 + ps_madds0 f11, f5, f9, f13 + psq_stu f11, 8(dstBase), 1, qr0 + lfd f14, 8(r1) + lfd f15, 16(r1) + lfd f16, 24(r1) + lfd f17, 32(r1) + lfd f18, 40(r1) + lfd f19, 48(r1) + lfd f20, 56(r1) + lfd f21, 64(r1) + lfd f22, 72(r1) + lfd f23, 80(r1) + lfd f24, 88(r1) + lfd f25, 96(r1) + lfd f26, 104(r1) + lfd f27, 112(r1) + lfd f28, 120(r1) + lfd f29, 128(r1) + lfd f30, 136(r1) + addi r1, r1, 160 + blr + /* clang-format on */ +} + +asm void PSMTXROMultS16VecArray(const register ROMtx m, const register S16Vec *srcBase, register Vec *dstBase, register u32 count) +{ + /* clang-format off */ + nofralloc + stwu r1, -64(r1) + stfd f14, 8(r1) + subi r7, count, 1 + stfd f15, 16(r1) + srwi r7, r7, 1 + stfd f16, 24(r1) + lis r8, 7 + stfd f17, 32(r1) + mtspr GQR6, r8 + stfd f18, 40(r1) + mtctr r7 + psq_l f0, 0(m), 0, qr0 + subi srcBase, srcBase, 4 + psq_l f1, 8(m), 1, qr0 + subi dstBase, dstBase, 4 + psq_l f6, 36(m), 0, qr0 + psq_lu f8, 4(srcBase), 0, qr6 + psq_l f7, 44(m), 1, qr0 + psq_lu f9, 4(srcBase), 0, qr6 + ps_madds0 f11, f0, f8, f6 + psq_l f2, 12(m), 0, qr0 + ps_madds0 f12, f1, f8, f7 + psq_l f3, 20(m), 1, qr0 + ps_madds1 f13, f0, f9, f6 + psq_lu f10, 4(srcBase), 0, qr6 + ps_madds1 f14, f1, f9, f7 + psq_l f5, 32(m), 1, qr0 + ps_madds1 f11, f2, f8, f11 + ps_madds1 f12, f3, f8, f12 + psq_l f4, 24(m), 0, qr0 + ps_madds0 f13, f2, f10, f13 + psq_lu f8, 4(srcBase), 0, qr6 + ps_madds0 f14, f3, f10, f14 + ps_madds0 f15, f4, f9, f11 + ps_madds0 f16, f5, f9, f12 + psq_lu f9, 4(srcBase), 0, qr6 + ps_madds1 f17, f4, f10, f13 + ps_madds1 f18, f5, f10, f14 + psq_lu f10, 4(srcBase), 0, qr6 +loop: + ps_madds0 f11, f0, f8, f6 + psq_stu f15, 4(dstBase), 0, qr0 + ps_madds0 f12, f1, f8, f7 + psq_stu f16, 8(dstBase), 1, qr0 + ps_madds1 f13, f0, f9, f6 + psq_stu f17, 4(dstBase), 0, qr0 + ps_madds1 f14, f1, f9, f7 + psq_stu f18, 8(dstBase), 1, qr0 + ps_madds1 f11, f2, f8, f11 + ps_madds1 f12, f3, f8, f12 + psq_lu f8, 4(srcBase), 0, qr6 + ps_madds0 f13, f2, f10, f13 + ps_madds0 f14, f3, f10, f14 + ps_madds0 f15, f4, f9, f11 + ps_madds0 f16, f5, f9, f12 + psq_lu f9, 4(srcBase), 0, qr6 + ps_madds1 f17, f4, f10, f13 + ps_madds1 f18, f5, f10, f14 + psq_lu f10, 4(srcBase), 0, qr6 + bdnz loop + psq_stu f15, 4(dstBase), 0, qr0 + clrlwi. r7, count, 31 + psq_stu f16, 8(dstBase), 1, qr0 + bne exit + psq_stu f17, 4(dstBase), 0, qr0 + psq_stu f18, 8(dstBase), 1, qr0 +exit: + lfd f14, 8(r1) + lfd f15, 16(r1) + lfd f16, 24(r1) + lfd f17, 32(r1) + lfd f18, 40(r1) + addi r1, r1, 64 + blr + /* clang-format on */ +} + +asm void PSMTXMultS16VecArray(const register Mtx44 m, const register S16Vec *srcBase, register Vec *dstBase, register u32 count) +{ + /* clang-format off */ + psq_l f0, 0(m), 0, qr0 + lis r7, 7 + mtspr GQR6, r7 + psq_l f6, 0(srcBase), 0, qr6 + subi count, count, 1 + psq_l f7, 4(srcBase), 1, qr6 + mtctr count + psq_l f1, 8(m), 0, qr0 + addi srcBase, srcBase, 4 + psq_l f2, 16(m), 0, qr0 + subi dstBase, dstBase, 4 + psq_l f3, 24(m), 0, qr0 + ps_mul f8, f0, f6 + psq_l f4, 32(m), 0, qr0 + ps_mul f10, f2, f6 + psq_l f5, 40(m), 0, qr0 + ps_mul f12, f4, f6 + psq_lu f6, 2(srcBase), 0, qr1 + ps_madd f8, f1, f7, f8 + ps_madd f10, f3, f7, f10 + ps_madd f12, f5, f7, f12 + psq_lu f7, 4(srcBase), 1, qr6 + ps_sum0 f9, f8, f8, f8 +loop: + ps_sum0 f11, f10, f10, f10 + ps_mul f8, f0, f6 + ps_sum0 f13, f12, f12, f12 + ps_mul f10, f2, f6 + psq_stu f9, 4(dstBase), 1, qr0 + ps_mul f12, f4, f6 + psq_stu f11, 4(dstBase), 1, qr0 + ps_madd f8, f1, f7, f8 + psq_stu f13, 4(dstBase), 1, qr0 + ps_madd f10, f3, f7, f10 + psq_lu f6, 2(srcBase), 0, qr6 + ps_madd f12, f5, f7, f12 + psq_lu f7, 4(srcBase), 1, qr6 + ps_sum0 f9, f8, f8, f8 + bdnz loop + ps_sum0 f11, f10, f10, f10 + ps_sum0 f13, f12, f12, f12 + psq_stu f9, 4(dstBase), 1, qr0 + psq_stu f11, 4(dstBase), 1, qr0 + psq_stu f13, 4(dstBase), 1, qr0 + /* clang-format on */ +} diff --git a/src/dolphin/mtx/quat.c b/src/dolphin/mtx/quat.c new file mode 100644 index 00000000..a874e14c --- /dev/null +++ b/src/dolphin/mtx/quat.c @@ -0,0 +1,80 @@ +#include "dolphin/mtx.h" +#include "math.h" + +void PSQUATMultiply(register const Quaternion *a, register const Quaternion *b, register Quaternion *ab) +{ + asm { + psq_l f0, 0(a), 0, 0 + psq_l f1, 8(a), 0, 0 + psq_l f2, 0(b), 0, 0 + ps_neg f5, f0 + psq_l f3, 8(b), 0, 0 + ps_neg f6, f1 + ps_merge01 f4, f5, f0 + ps_muls0 f7, f1, f2 + ps_muls0 f5, f5, f2 + ps_merge01 f1, f6, f1 + ps_muls1 f8, f4, f2 + ps_madds0 f7, f4, f3, f7 + ps_muls1 f2, f1, f2 + ps_madds0 f5, f1, f3, f5 + ps_madds1 f8, f6, f3, f8 + ps_merge10 f7, f7, f7 + ps_madds1 f2, f0, f3, f2 + ps_merge10 f5, f5, f5 + ps_add f7, f7, f2 + psq_st f7, 0(ab), 0, 0 + ps_sub f5, f5, f8 + psq_st f5, 8(ab), 0, 0 + } +} + +void C_QUATRotAxisRad(Quaternion *q, const Vec *axis, f32 rad) +{ + f32 tmp, tmp2, tmp3; + Vec dst; + + tmp = rad; + PSVECNormalize(axis, &dst); + + tmp2 = tmp * 0.5f; + tmp3 = sinf(tmp * 0.5f); + tmp = tmp3; + tmp3 = cosf(tmp2); + + q->x = tmp * dst.x; + q->y = tmp * dst.y; + q->z = tmp * dst.z; + q->w = tmp3; +} + +void C_QUATSlerp(const Quaternion *p, const Quaternion *q, Quaternion *r, f32 t) +{ + f32 ratioA, ratioB; + + f32 value = 1.0f; + f32 cosHalfTheta = p->x * q->x + p->y * q->y + p->z * q->z + p->w * q->w; + + if (cosHalfTheta < 0.0f) { + cosHalfTheta = -cosHalfTheta; + value = -value; + } + + if (cosHalfTheta <= 0.9999899864196777f) { + f32 halfTheta = acosf(cosHalfTheta); + f32 sinHalfTheta = sinf(halfTheta); + + ratioA = sinf((1.0f - t) * halfTheta) / sinHalfTheta; + ratioB = sinf(t * halfTheta) / sinHalfTheta; + value *= ratioB; + } + else { + ratioA = 1.0f - t; + value *= t; + } + + r->x = (ratioA * p->x) + (value * q->x); + r->y = (ratioA * p->y) + (value * q->y); + r->z = (ratioA * p->z) + (value * q->z); + r->w = (ratioA * p->w) + (value * q->w); +} diff --git a/src/dolphin/mtx/vec.c b/src/dolphin/mtx/vec.c new file mode 100644 index 00000000..985a6196 --- /dev/null +++ b/src/dolphin/mtx/vec.c @@ -0,0 +1,287 @@ +#include "dolphin/mtx.h" +#include "math.h" + +#define R_RET fp1 +#define FP2 fp2 +#define FP3 fp3 +#define FP4 fp4 +#define FP5 fp5 +#define FP6 fp6 +#define FP7 fp7 +#define FP8 fp8 +#define FP9 fp9 +#define FP10 fp10 +#define FP11 fp11 +#define FP12 fp12 +#define FP13 fp13 + +asm void PSVECAdd(const register Vec *vec1, const register Vec *vec2, register Vec *ret) +{ +#ifdef __MWERKS__ // clang-format off + nofralloc; + psq_l FP2, 0(vec1), 0, 0; + psq_l FP4, 0(vec2), 0, 0; + ps_add FP6, FP2, FP4; + psq_st FP6, 0(ret), 0, 0; + psq_l FP3, 8(vec1), 1, 0; + psq_l FP5, 8(vec2), 1, 0; + ps_add FP7, FP3, FP5; + psq_st FP7, 8(ret), 1, 0; + blr +#endif // clang-format on +} + +asm void PSVECSubtract(const register Vec *vec1, const register Vec *vec2, register Vec *ret) +{ +#ifdef __MWERKS__ // clang-format off + nofralloc; + psq_l FP2, 0(vec1), 0, 0; + psq_l FP4, 0(vec2), 0, 0; + ps_sub FP6, FP2, FP4; + psq_st FP6, 0(ret), 0, 0; + psq_l FP3, 8(vec1), 1, 0; + psq_l FP5, 8(vec2), 1, 0; + ps_sub FP7, FP3, FP5; + psq_st FP7, 8(ret), 1, 0; + blr +#endif // clang-format on +} + +asm void PSVECScale(register const Vec *src, register Vec *dst, register f32 scale) +{ +#ifdef __MWERKS__ // clang-format off + nofralloc + psq_l f0, 0(src), 0, 0 + psq_l f2, 8(src), 1, 0 + ps_muls0 f0, f0, f1 + psq_st f0, 0(dst), 0, 0 + ps_muls0 f0, f2, f1 + psq_st f0, 8(dst), 1, 0 + blr +#endif // clang-format on +} + +void C_VECScale(const Vec *src, Vec *dst, f32 scale) +{ + f32 s; + + s = 1.0f / sqrtf(src->z * src->z + src->x * src->x + src->y * src->y); + dst->x = src->x * s; + dst->y = src->y * s; + dst->z = src->z * s; +} + +void PSVECNormalize(const register Vec *vec1, register Vec *ret) +{ +#ifdef __MWERKS__ // clang-format off + register f32 half = 0.5f; + register f32 three = 3.0f; + register f32 xx_zz, xx_yy; + register f32 square_sum; + register f32 ret_sqrt; + register f32 n_0, n_1; + asm { + psq_l FP2, 0(vec1), 0, 0; + ps_mul xx_yy, FP2, FP2; + psq_l FP3, 8(vec1), 1, 0; + ps_madd xx_zz, FP3, FP3, xx_yy; + ps_sum0 square_sum, xx_zz, FP3, xx_yy; + frsqrte ret_sqrt, square_sum; + fmuls n_0, ret_sqrt, ret_sqrt; + fmuls n_1, ret_sqrt, half; + fnmsubs n_0, n_0, square_sum, three; + fmuls ret_sqrt, n_0, n_1; + ps_muls0 FP2, FP2, ret_sqrt; + psq_st FP2, 0(ret), 0, 0; + ps_muls0 FP3, FP3, ret_sqrt; + psq_st FP3, 8(ret), 1, 0; + } +#endif // clang-format on +} + +asm f32 PSVECSquareMag(register const Vec *v) { +#ifdef __MWERKS__ // clang-format off + nofralloc + psq_l f0, 0(v), 0, 0 + ps_mul f0, f0, f0 + lfs f1, 8(v) + ps_madd f1, f1, f1, f0 + ps_sum0 f1, f1, f0, f0 + blr +#endif // clang-format on +} + +f32 PSVECMag(const register Vec *v) +{ + register f32 v_xy, v_zz, square_mag; + register f32 ret_mag, n_0, n_1; + register f32 three, half, zero; +#ifdef __MWERKS__ // clang-format off + asm { + psq_l v_xy, 0(v), 0, 0 + ps_mul v_xy, v_xy, v_xy + lfs v_zz, 8(v) + ps_madd square_mag, v_zz, v_zz, v_xy + } +#endif // clang-format on + half = 0.5f; +#ifdef __MWERKS__ // clang-format off + asm { + ps_sum0 square_mag, square_mag, v_xy, v_xy + frsqrte ret_mag, square_mag + } +#endif // clang-format on + three = 3.0f; +#ifdef __MWERKS__ // clang-format off +asm { + fmuls n_0, ret_mag, ret_mag + fmuls n_1, ret_mag, half + fnmsubs n_0, n_0, square_mag, three + fmuls ret_mag, n_0, n_1 + fsel ret_mag, ret_mag, ret_mag, square_mag + fmuls square_mag, square_mag, ret_mag + } +#endif // clang-format on + return square_mag; +} + +asm f32 PSVECDotProduct(const register Vec *vec1, const register Vec *vec2) +{ +#ifdef __MWERKS__ // clang-format off + nofralloc; + psq_l f2, 4(r3), 0, 0 /* qr0 */ + psq_l f3, 4(r4), 0, 0 /* qr0 */ + ps_mul f2, f2, f3 + psq_l f5, 0(r3), 0, 0 /* qr0 */ + psq_l f4, 0(r4), 0, 0 /* qr0 */ + ps_madd f3, f5, f4, f2 + ps_sum0 f1, f3, f2, f2 + blr +#endif // clang-format on +} + +asm void PSVECCrossProduct(register const Vec *a, register const Vec *b, register Vec *axb) +{ +#ifdef __MWERKS__ // clang-format off + nofralloc + psq_l f1, 0(b), 0, 0 + lfs f2, 8(a) + psq_l f0, 0(a), 0, 0 + ps_merge10 f6, f1, f1 + lfs f3, 8(b) + ps_mul f4, f1, f2 + ps_muls0 f7, f1, f0 + ps_msub f5, f0, f3, f4 + ps_msub f8, f0, f6, f7 + ps_merge11 f9, f5, f5 + ps_merge01 f10, f5, f8 + psq_st f9, 0(axb), 1, 0 + ps_neg f10, f10 + psq_st f10, 4(axb), 0, 0 + blr +#endif // clang-format on +} + +void C_VECHalfAngle(const Vec *a, const Vec *b, Vec *half) +{ + Vec a0; + Vec b0; + Vec ab; + + a0.x = -a->x; + a0.y = -a->y; + a0.z = -a->z; + + b0.x = -b->x; + b0.y = -b->y; + b0.z = -b->z; + + VECNormalize(&a0, &a0); + VECNormalize(&b0, &b0); + VECAdd(&a0, &b0, &ab); + + if (VECDotProduct(&ab, &ab) > 0.0f) { + VECNormalize(&ab, half); + } + else { + *half = ab; + } +} + +void C_VECReflect(const Vec *src, const Vec *normal, Vec *dst) +{ + // Vec a0; + // Vec b0; + // f32 dot; + + // a0.x = -src->x; + // a0.y = -src->y; + // a0.z = -src->z; + + // VECNormalize(&a0, &a0); + // VECNormalize(normal, &b0); + + // dot = VECDotProduct(&a0, &b0); + // dst->x = b0.x * 2.0f * dot - a0.x; + // dst->y = b0.y * 2.0f * dot - a0.y; + // dst->z = b0.z * 2.0f * dot - a0.z; + + // VECNormalize(dst, dst); +} + +asm f32 PSVECSquareDistance(register const Vec *a, register const Vec *b) { +#ifdef __MWERKS__ // clang-format off + nofralloc + psq_l f0, 4(a), 0, 0 + psq_l f1, 4(b), 0, 0 + ps_sub f2, f0, f1 + psq_l f0, 0(a), 0, 0 + psq_l f1, 0(b), 0, 0 + ps_mul f2, f2, f2 + ps_sub f0, f0, f1 + ps_madd f1, f0, f0, f2 + ps_sum0 f1, f1, f2, f2 + blr +#endif // clang-format on +} + +f32 PSVECDistance(register const Vec *a, register const Vec *b) +{ + + register f32 half_c; + register f32 three_c; + register f32 dist; + +#ifdef __MWERKS__ // clang-format off + asm { + psq_l f0, 4(a), 0, 0 /* qr0 */ + psq_l f1, 4(b), 0, 0 /* qr0 */ + ps_sub f2, f0, f1 + psq_l f0, 0(a), 0, 0 /* qr0 */ + psq_l f1, 0(b), 0, 0 /* qr0 */ + ps_mul f2, f2, f2 + ps_sub f0, f0, f1 + } + + half_c = 0.5f; + + asm { + ps_madd f0, f0, f0, f2 + ps_sum0 f0, f0, f2, f2 + } + + three_c = 3.0f; + + asm { + frsqrte dist, f0 + fmuls f2, dist, dist + fmuls dist, dist, half_c + fnmsubs f2, f2, f0, three_c + fmuls dist, f2, dist + fsel dist, dist, dist, f0 + fmuls dist, f0, dist + } + + return dist; +#endif // clang-format on +} diff --git a/src/dolphin/pad/Pad.c b/src/dolphin/pad/Pad.c new file mode 100644 index 00000000..1852cf0d --- /dev/null +++ b/src/dolphin/pad/Pad.c @@ -0,0 +1,783 @@ +#include +#include + +u8 UnkVal : (OS_BASE_CACHED | 0x30e3); +u16 __OSWirelessPadFixMode : (OS_BASE_CACHED | 0x30E0); + +static void PADTypeAndStatusCallback(s32 chan, u32 type); +static void PADOriginCallback(s32 chan, u32 error, OSContext *context); +static void PADProbeCallback(s32 chan, u32 error, OSContext *context); +static void SPEC0_MakeStatus(s32 chan, PADStatus *status, u32 data[2]); +static void SPEC1_MakeStatus(s32 chan, PADStatus *status, u32 data[2]); +static void SPEC2_MakeStatus(s32 chan, PADStatus *status, u32 data[2]); +static void PADTypeAndStatusCallback(s32 chan, u32 type); + +static void PADOriginCallback(s32 chan, u32 error, OSContext *context); +static void PADProbeCallback(s32 chan, u32 error, OSContext *context); + +static void SPEC0_MakeStatus(s32 chan, PADStatus *status, u32 data[2]); +static void SPEC1_MakeStatus(s32 chan, PADStatus *status, u32 data[2]); +static void SPEC2_MakeStatus(s32 chan, PADStatus *status, u32 data[2]); + +static BOOL Initialized; + +static u32 EnabledBits; +static u32 ResettingBits; +static s32 ResettingChan = 32; +static u32 RecalibrateBits; +static u32 WaitingBits; +static u32 CheckingBits; +static u32 PendingBits; + +static u32 XPatchBits = PAD_CHAN0_BIT | PAD_CHAN1_BIT | PAD_CHAN2_BIT | PAD_CHAN3_BIT; + +static u32 AnalogMode = 0x00000300u; + +u32 __PADSpec; +static u32 Spec = 5; +static void (*MakeStatus)(s32, PADStatus *, u32[2]) = SPEC2_MakeStatus; + +static u32 Type[SI_MAX_CHAN]; +static PADStatus Origin[SI_MAX_CHAN]; + +static u32 CmdReadOrigin = 0x41 << 24; +static u32 CmdCalibrate = 0x42 << 24; +static u32 CmdProbeDevice[SI_MAX_CHAN]; + +static BOOL OnReset(BOOL final); + +static OSResetFunctionInfo ResetFunctionInfo = { OnReset, 127 }; + +static void (*SamplingCallback)(void); + +static void PADEnable(s32 chan) +{ + u32 cmd; + u32 chanBit; + u32 data[2]; + + chanBit = PAD_CHAN0_BIT >> chan; + EnabledBits |= chanBit; + SIGetResponse(chan, data); + cmd = (0x40 << 16) | AnalogMode; + SISetCommand(chan, cmd); + SIEnablePolling(EnabledBits); +} + +static void PADDisable(s32 chan) +{ + BOOL enabled; + u32 chanBit; + + enabled = OSDisableInterrupts(); + + chanBit = PAD_CHAN0_BIT >> chan; + SIDisablePolling(chanBit); + EnabledBits &= ~chanBit; + WaitingBits &= ~chanBit; + CheckingBits &= ~chanBit; + PendingBits &= ~chanBit; + OSSetWirelessID(chan, 0); + + OSRestoreInterrupts(enabled); +} + +static void DoReset(void) +{ + u32 chanBit; + + ResettingChan = __cntlzw(ResettingBits); + if (ResettingChan == 32) { + return; + } + + chanBit = PAD_CHAN0_BIT >> ResettingChan; + ResettingBits &= ~chanBit; + + memset(&Origin[ResettingChan], 0, sizeof(PADStatus)); + SIGetTypeAsync(ResettingChan, PADTypeAndStatusCallback); +} + +static void UpdateOrigin(s32 chan) +{ + PADStatus *origin; + u32 chanBit = PAD_CHAN0_BIT >> chan; + + origin = &Origin[chan]; + switch (AnalogMode & 0x00000700u) { + case 0x00000000u: + case 0x00000500u: + case 0x00000600u: + case 0x00000700u: + origin->triggerL &= ~15; + origin->triggerR &= ~15; + origin->analogA &= ~15; + origin->analogB &= ~15; + break; + case 0x00000100u: + origin->substickX &= ~15; + origin->substickY &= ~15; + origin->analogA &= ~15; + origin->analogB &= ~15; + break; + case 0x00000200u: + origin->substickX &= ~15; + origin->substickY &= ~15; + origin->triggerL &= ~15; + origin->triggerR &= ~15; + break; + case 0x00000300u: + break; + case 0x00000400u: + break; + } + + origin->stickX -= 128; + origin->stickY -= 128; + origin->substickX -= 128; + origin->substickY -= 128; + + if (XPatchBits & chanBit) { + if (64 < origin->stickX && (SIGetType(chan) & 0xffff0000) == SI_GC_CONTROLLER) { + origin->stickX = 0; + } + } +} + +static void PADOriginCallback(s32 chan, u32 error, OSContext *context) +{ + if (!(error & (SI_ERROR_UNDER_RUN | SI_ERROR_OVER_RUN | SI_ERROR_NO_RESPONSE | SI_ERROR_COLLISION))) { + UpdateOrigin(ResettingChan); + PADEnable(ResettingChan); + } + DoReset(); +} + +static void PADOriginUpdateCallback(s32 chan, u32 error, OSContext *context) +{ + + if (!(EnabledBits & (PAD_CHAN0_BIT >> chan))) { + return; + } + + if (!(error & (SI_ERROR_UNDER_RUN | SI_ERROR_OVER_RUN | SI_ERROR_NO_RESPONSE | SI_ERROR_COLLISION))) { + UpdateOrigin(chan); + } + + if (error & SI_ERROR_NO_RESPONSE) { + PADDisable(chan); + } +} + +static void PADProbeCallback(s32 chan, u32 error, OSContext *context) +{ + if (!(error & (SI_ERROR_UNDER_RUN | SI_ERROR_OVER_RUN | SI_ERROR_NO_RESPONSE | SI_ERROR_COLLISION))) { + PADEnable(ResettingChan); + WaitingBits |= PAD_CHAN0_BIT >> ResettingChan; + } + DoReset(); +} + +static void PADTypeAndStatusCallback(s32 chan, u32 type) +{ + u32 chanBit; + u32 recalibrate; + BOOL rc = TRUE; + u32 error; + chanBit = PAD_CHAN0_BIT >> ResettingChan; + error = type & 0xFF; + recalibrate = RecalibrateBits & chanBit; + RecalibrateBits &= ~chanBit; + + if (error & (SI_ERROR_UNDER_RUN | SI_ERROR_OVER_RUN | SI_ERROR_NO_RESPONSE | SI_ERROR_COLLISION)) { + DoReset(); + return; + } + + type &= ~0xFF; + + Type[ResettingChan] = type; + + if ((type & SI_TYPE_MASK) != SI_TYPE_GC || !(type & SI_GC_STANDARD)) { + DoReset(); + return; + } + + if (Spec < PAD_SPEC_2) { + PADEnable(ResettingChan); + DoReset(); + return; + } + + if (!(type & SI_GC_WIRELESS) || (type & SI_WIRELESS_IR)) { + if (recalibrate) { + rc = SITransfer(ResettingChan, &CmdCalibrate, 3, &Origin[ResettingChan], 10, PADOriginCallback, 0); + } + else { + rc = SITransfer(ResettingChan, &CmdReadOrigin, 1, &Origin[ResettingChan], 10, PADOriginCallback, 0); + } + } + else if ((type & SI_WIRELESS_FIX_ID) && (type & SI_WIRELESS_CONT_MASK) == SI_WIRELESS_CONT && !(type & SI_WIRELESS_LITE)) { + if (type & SI_WIRELESS_RECEIVED) { + rc = SITransfer(ResettingChan, &CmdReadOrigin, 1, &Origin[ResettingChan], 10, PADOriginCallback, 0); + } + else { + rc = SITransfer(ResettingChan, &CmdProbeDevice[ResettingChan], 3, &Origin[ResettingChan], 8, PADProbeCallback, 0); + } + } + if (!rc) { + PendingBits |= chanBit; + DoReset(); + return; + } +} + +static void PADReceiveCheckCallback(s32 chan, u32 type) +{ + u32 error; + u32 chanBit; + + chanBit = PAD_CHAN0_BIT >> chan; + if (!(EnabledBits & chanBit)) { + return; + } + + error = type & 0xFF; + type &= ~0xFF; + + WaitingBits &= ~chanBit; + CheckingBits &= ~chanBit; + + if (!(error & (SI_ERROR_UNDER_RUN | SI_ERROR_OVER_RUN | SI_ERROR_NO_RESPONSE | SI_ERROR_COLLISION)) && (type & SI_GC_WIRELESS) + && (type & SI_WIRELESS_FIX_ID) && (type & SI_WIRELESS_RECEIVED) && !(type & SI_WIRELESS_IR) + && (type & SI_WIRELESS_CONT_MASK) == SI_WIRELESS_CONT && !(type & SI_WIRELESS_LITE)) { + SITransfer(chan, &CmdReadOrigin, 1, &Origin[chan], 10, PADOriginUpdateCallback, 0); + } + else { + PADDisable(chan); + } +} + +BOOL PADReset(u32 mask) +{ + BOOL enabled; + u32 diableBits; + + enabled = OSDisableInterrupts(); + + mask |= PendingBits; + PendingBits = 0; + mask &= ~(WaitingBits | CheckingBits); + ResettingBits |= mask; + diableBits = ResettingBits & EnabledBits; + EnabledBits &= ~mask; + + if (Spec == PAD_SPEC_4) { + RecalibrateBits |= mask; + } + + SIDisablePolling(diableBits); + + if (ResettingChan == 32) { + DoReset(); + } + OSRestoreInterrupts(enabled); + return TRUE; +} + +BOOL PADRecalibrate(u32 mask) +{ + BOOL enabled; + u32 disableBits; + + enabled = OSDisableInterrupts(); + + mask |= PendingBits; + PendingBits = 0; + mask &= ~(WaitingBits | CheckingBits); + ResettingBits |= mask; + disableBits = ResettingBits & EnabledBits; + EnabledBits &= ~mask; + + if (!(UnkVal & 0x40)) { + RecalibrateBits |= mask; + } + + SIDisablePolling(disableBits); + if (ResettingChan == 32) { + DoReset(); + } + OSRestoreInterrupts(enabled); + return TRUE; +} + +BOOL PADInit() +{ + s32 chan; + if (Initialized) { + return TRUE; + } + + if (__PADSpec) { + PADSetSpec(__PADSpec); + } + + Initialized = TRUE; + + if (__PADFixBits != 0) { + OSTime time = OSGetTime(); + __OSWirelessPadFixMode = (u16)((((time)&0xffff) + ((time >> 16) & 0xffff) + ((time >> 32) & 0xffff) + ((time >> 48) & 0xffff)) & 0x3fffu); + RecalibrateBits = PAD_CHAN0_BIT | PAD_CHAN1_BIT | PAD_CHAN2_BIT | PAD_CHAN3_BIT; + } + + for (chan = 0; chan < SI_MAX_CHAN; ++chan) { + CmdProbeDevice[chan] = (0x4D << 24) | (chan << 22) | ((__OSWirelessPadFixMode & 0x3fffu) << 8); + } + + SIRefreshSamplingRate(); + OSRegisterResetFunction(&ResetFunctionInfo); + + return PADReset(PAD_CHAN0_BIT | PAD_CHAN1_BIT | PAD_CHAN2_BIT | PAD_CHAN3_BIT); +} + +#define offsetof(type, memb) ((u32) & ((type *)0)->memb) + +u32 PADRead(PADStatus *status) +{ + BOOL enabled; + s32 chan; + u32 data[2]; + u32 chanBit; + u32 sr; + int chanShift; + u32 motor; + + enabled = OSDisableInterrupts(); + + motor = 0; + for (chan = 0; chan < SI_MAX_CHAN; chan++, status++) { + chanBit = PAD_CHAN0_BIT >> chan; + chanShift = 8 * (SI_MAX_CHAN - 1 - chan); + + if (PendingBits & chanBit) { + PADReset(0); + status->err = PAD_ERR_NOT_READY; + memset(status, 0, offsetof(PADStatus, err)); + continue; + } + + if ((ResettingBits & chanBit) || ResettingChan == chan) { + status->err = PAD_ERR_NOT_READY; + memset(status, 0, offsetof(PADStatus, err)); + continue; + } + + if (!(EnabledBits & chanBit)) { + status->err = (s8)PAD_ERR_NO_CONTROLLER; + memset(status, 0, offsetof(PADStatus, err)); + continue; + } + + if (SIIsChanBusy(chan)) { + status->err = PAD_ERR_TRANSFER; + memset(status, 0, offsetof(PADStatus, err)); + continue; + } + + sr = SIGetStatus(chan); + if (sr & SI_ERROR_NO_RESPONSE) { + SIGetResponse(chan, data); + + if (WaitingBits & chanBit) { + status->err = (s8)PAD_ERR_NONE; + memset(status, 0, offsetof(PADStatus, err)); + + if (!(CheckingBits & chanBit)) { + CheckingBits |= chanBit; + SIGetTypeAsync(chan, PADReceiveCheckCallback); + } + continue; + } + + PADDisable(chan); + + status->err = (s8)PAD_ERR_NO_CONTROLLER; + memset(status, 0, offsetof(PADStatus, err)); + continue; + } + + if (!(SIGetType(chan) & SI_GC_NOMOTOR)) { + motor |= chanBit; + } + + if (!SIGetResponse(chan, data)) { + status->err = PAD_ERR_TRANSFER; + memset(status, 0, offsetof(PADStatus, err)); + continue; + } + + if (data[0] & 0x80000000) { + status->err = PAD_ERR_TRANSFER; + memset(status, 0, offsetof(PADStatus, err)); + continue; + } + + MakeStatus(chan, status, data); + + // Check and clear PAD_ORIGIN bit + if (status->button & 0x2000) { + status->err = PAD_ERR_TRANSFER; + memset(status, 0, offsetof(PADStatus, err)); + + // Get origin. It is okay if the following transfer fails + // since the PAD_ORIGIN bit remains until the read origin + // command complete. + SITransfer(chan, &CmdReadOrigin, 1, &Origin[chan], 10, PADOriginUpdateCallback, 0); + continue; + } + + status->err = PAD_ERR_NONE; + + // Clear PAD_INTERFERE bit + status->button &= ~0x0080; + } + + OSRestoreInterrupts(enabled); + return motor; +} + +void PADControlAllMotors(const u32 *commandArray) +{ + BOOL enabled; + int chan; + u32 command; + BOOL commit; + u32 chanBit; + + enabled = OSDisableInterrupts(); + commit = FALSE; + for (chan = 0; chan < SI_MAX_CHAN; chan++, commandArray++) { + chanBit = PAD_CHAN0_BIT >> chan; + if ((EnabledBits & chanBit) && !(SIGetType(chan) & SI_GC_NOMOTOR)) { + command = *commandArray; + if (Spec < PAD_SPEC_2 && command == PAD_MOTOR_STOP_HARD) { + command = PAD_MOTOR_STOP; + } + + SISetCommand(chan, (0x40 << 16) | AnalogMode | (command & (0x00000001 | 0x00000002))); + commit = TRUE; + } + } + if (commit) { + SITransferCommands(); + } + OSRestoreInterrupts(enabled); +} + +void PADControlMotor(s32 chan, u32 command) +{ + BOOL enabled; + u32 chanBit; + + enabled = OSDisableInterrupts(); + chanBit = PAD_CHAN0_BIT >> chan; + if ((EnabledBits & chanBit) && !(SIGetType(chan) & SI_GC_NOMOTOR)) { + if (Spec < PAD_SPEC_2 && command == PAD_MOTOR_STOP_HARD) { + command = PAD_MOTOR_STOP; + } + + SISetCommand(chan, (0x40 << 16) | AnalogMode | (command & (0x00000001 | 0x00000002))); + SITransferCommands(); + } + OSRestoreInterrupts(enabled); +} + +void PADSetSpec(u32 spec) +{ + __PADSpec = 0; + switch (spec) { + case PAD_SPEC_0: + MakeStatus = SPEC0_MakeStatus; + break; + case PAD_SPEC_1: + MakeStatus = SPEC1_MakeStatus; + break; + case PAD_SPEC_2: + case PAD_SPEC_3: + case PAD_SPEC_4: + case PAD_SPEC_5: + MakeStatus = SPEC2_MakeStatus; + break; + } + Spec = spec; +} + +u32 PADGetSpec(void) +{ + return Spec; +} + +static void SPEC0_MakeStatus(s32 chan, PADStatus *status, u32 data[2]) +{ + status->button = 0; + status->button |= ((data[0] >> 16) & 0x0008) ? PAD_BUTTON_A : 0; + status->button |= ((data[0] >> 16) & 0x0020) ? PAD_BUTTON_B : 0; + status->button |= ((data[0] >> 16) & 0x0100) ? PAD_BUTTON_X : 0; + status->button |= ((data[0] >> 16) & 0x0001) ? PAD_BUTTON_Y : 0; + status->button |= ((data[0] >> 16) & 0x0010) ? PAD_BUTTON_START : 0; + status->stickX = (s8)(data[1] >> 16); + status->stickY = (s8)(data[1] >> 24); + status->substickX = (s8)(data[1]); + status->substickY = (s8)(data[1] >> 8); + status->triggerL = (u8)(data[0] >> 8); + status->triggerR = (u8)data[0]; + status->analogA = 0; + status->analogB = 0; + if (170 <= status->triggerL) { + status->button |= PAD_TRIGGER_L; + } + if (170 <= status->triggerR) { + status->button |= PAD_TRIGGER_R; + } + status->stickX -= 128; + status->stickY -= 128; + status->substickX -= 128; + status->substickY -= 128; +} + +static void SPEC1_MakeStatus(s32 chan, PADStatus *status, u32 data[2]) +{ + + status->button = 0; + status->button |= ((data[0] >> 16) & 0x0080) ? PAD_BUTTON_A : 0; + status->button |= ((data[0] >> 16) & 0x0100) ? PAD_BUTTON_B : 0; + status->button |= ((data[0] >> 16) & 0x0020) ? PAD_BUTTON_X : 0; + status->button |= ((data[0] >> 16) & 0x0010) ? PAD_BUTTON_Y : 0; + status->button |= ((data[0] >> 16) & 0x0200) ? PAD_BUTTON_START : 0; + + status->stickX = (s8)(data[1] >> 16); + status->stickY = (s8)(data[1] >> 24); + status->substickX = (s8)(data[1]); + status->substickY = (s8)(data[1] >> 8); + + status->triggerL = (u8)(data[0] >> 8); + status->triggerR = (u8)data[0]; + + status->analogA = 0; + status->analogB = 0; + + if (170 <= status->triggerL) { + status->button |= PAD_TRIGGER_L; + } + if (170 <= status->triggerR) { + status->button |= PAD_TRIGGER_R; + } + + status->stickX -= 128; + status->stickY -= 128; + status->substickX -= 128; + status->substickY -= 128; +} + +static s8 ClampS8(s8 var, s8 org) +{ + if (0 < org) { + s8 min = (s8)(-128 + org); + if (var < min) { + var = min; + } + } + else if (org < 0) { + s8 max = (s8)(127 + org); + if (max < var) { + var = max; + } + } + return var -= org; +} + +static u8 ClampU8(u8 var, u8 org) +{ + if (var < org) { + var = org; + } + return var -= org; +} + +#define PAD_ALL \ + (PAD_BUTTON_LEFT | PAD_BUTTON_RIGHT | PAD_BUTTON_DOWN | PAD_BUTTON_UP | PAD_TRIGGER_Z | PAD_TRIGGER_R | PAD_TRIGGER_L | PAD_BUTTON_A \ + | PAD_BUTTON_B | PAD_BUTTON_X | PAD_BUTTON_Y | PAD_BUTTON_MENU | 0x2000 | 0x0080) + +static void SPEC2_MakeStatus(s32 chan, PADStatus *status, u32 data[2]) +{ + PADStatus *origin; + + status->button = (u16)((data[0] >> 16) & PAD_ALL); + status->stickX = (s8)(data[0] >> 8); + status->stickY = (s8)(data[0]); + + switch (AnalogMode & 0x00000700) { + case 0x00000000: + case 0x00000500: + case 0x00000600: + case 0x00000700: + status->substickX = (s8)(data[1] >> 24); + status->substickY = (s8)(data[1] >> 16); + status->triggerL = (u8)(((data[1] >> 12) & 0x0f) << 4); + status->triggerR = (u8)(((data[1] >> 8) & 0x0f) << 4); + status->analogA = (u8)(((data[1] >> 4) & 0x0f) << 4); + status->analogB = (u8)(((data[1] >> 0) & 0x0f) << 4); + break; + case 0x00000100: + status->substickX = (s8)(((data[1] >> 28) & 0x0f) << 4); + status->substickY = (s8)(((data[1] >> 24) & 0x0f) << 4); + status->triggerL = (u8)(data[1] >> 16); + status->triggerR = (u8)(data[1] >> 8); + status->analogA = (u8)(((data[1] >> 4) & 0x0f) << 4); + status->analogB = (u8)(((data[1] >> 0) & 0x0f) << 4); + break; + case 0x00000200: + status->substickX = (s8)(((data[1] >> 28) & 0x0f) << 4); + status->substickY = (s8)(((data[1] >> 24) & 0x0f) << 4); + status->triggerL = (u8)(((data[1] >> 20) & 0x0f) << 4); + status->triggerR = (u8)(((data[1] >> 16) & 0x0f) << 4); + status->analogA = (u8)(data[1] >> 8); + status->analogB = (u8)(data[1] >> 0); + break; + case 0x00000300: + status->substickX = (s8)(data[1] >> 24); + status->substickY = (s8)(data[1] >> 16); + status->triggerL = (u8)(data[1] >> 8); + status->triggerR = (u8)(data[1] >> 0); + status->analogA = 0; + status->analogB = 0; + break; + case 0x00000400: + status->substickX = (s8)(data[1] >> 24); + status->substickY = (s8)(data[1] >> 16); + status->triggerL = 0; + status->triggerR = 0; + status->analogA = (u8)(data[1] >> 8); + status->analogB = (u8)(data[1] >> 0); + break; + } + + status->stickX -= 128; + status->stickY -= 128; + status->substickX -= 128; + status->substickY -= 128; + + origin = &Origin[chan]; + status->stickX = ClampS8(status->stickX, origin->stickX); + status->stickY = ClampS8(status->stickY, origin->stickY); + status->substickX = ClampS8(status->substickX, origin->substickX); + status->substickY = ClampS8(status->substickY, origin->substickY); + status->triggerL = ClampU8(status->triggerL, origin->triggerL); + status->triggerR = ClampU8(status->triggerR, origin->triggerR); +} + +BOOL PADGetType(s32 chan, u32 *type) +{ + u32 chanBit; + + *type = SIGetType(chan); + chanBit = PAD_CHAN0_BIT >> chan; + if ((ResettingBits & chanBit) || ResettingChan == chan || !(EnabledBits & chanBit)) { + return FALSE; + } + return TRUE; +} + +BOOL PADSync(void) +{ + return ResettingBits == 0 && ResettingChan == 32 && !SIBusy(); +} + +void PADSetAnalogMode(u32 mode) +{ + BOOL enabled; + u32 mask; + + enabled = OSDisableInterrupts(); + AnalogMode = mode << 8; + mask = EnabledBits; + + EnabledBits &= ~mask; + WaitingBits &= ~mask; + CheckingBits &= ~mask; + + SIDisablePolling(mask); + OSRestoreInterrupts(enabled); +} + +static BOOL OnReset(BOOL f) +{ + static BOOL recalibrated = FALSE; + BOOL sync; + + if (SamplingCallback) { + PADSetSamplingCallback(NULL); + } + + if (!f) { + sync = PADSync(); + if (!recalibrated && sync) { + recalibrated = PADRecalibrate(PAD_CHAN0_BIT | PAD_CHAN1_BIT | PAD_CHAN2_BIT | PAD_CHAN3_BIT); + return FALSE; + } + return sync; + } + else { + recalibrated = FALSE; + } + + return TRUE; +} + +void __PADDisableXPatch(void) +{ + XPatchBits = 0; +} + +static void SamplingHandler(__OSInterrupt interrupt, OSContext *context) +{ + OSContext exceptionContext; + + if (SamplingCallback) { + OSClearContext(&exceptionContext); + OSSetCurrentContext(&exceptionContext); + SamplingCallback(); + OSClearContext(&exceptionContext); + OSSetCurrentContext(context); + } +} + +PADSamplingCallback PADSetSamplingCallback(PADSamplingCallback callback) +{ + PADSamplingCallback prev; + + prev = SamplingCallback; + SamplingCallback = callback; + if (callback) { + SIRegisterPollingHandler(SamplingHandler); + } + else { + SIUnregisterPollingHandler(SamplingHandler); + } + return prev; +} + +BOOL __PADDisableRecalibration(BOOL disable) +{ + BOOL enabled; + BOOL prev; + + enabled = OSDisableInterrupts(); + prev = (UnkVal & 0x40) ? TRUE : FALSE; + UnkVal &= (u8)~0x40; + if (disable) { + UnkVal |= 0x40; + } + OSRestoreInterrupts(enabled); + return prev; +} diff --git a/src/dolphin/pad/Padclamp.c b/src/dolphin/pad/Padclamp.c new file mode 100644 index 00000000..a96d75c4 --- /dev/null +++ b/src/dolphin/pad/Padclamp.c @@ -0,0 +1,119 @@ +#include + +#include + +typedef struct PADClampRegion { + u8 minTrigger; + u8 maxTrigger; + s8 minStick; + s8 maxStick; + s8 xyStick; + s8 minSubstick; + s8 maxSubstick; + s8 xySubstick; +} PADClampRegion; + +static PADClampRegion ClampRegion = { + // Triggers + 30, + 180, + + // Left stick + 15, + 72, + 40, + + // Right stick + 15, + 59, + 31, +}; + +static void ClampStick(s8 *px, s8 *py, s8 max, s8 xy, s8 min) +{ + int x = *px; + int y = *py; + int signX; + int signY; + int d; + + if (0 <= x) { + signX = 1; + } + else { + signX = -1; + x = -x; + } + + if (0 <= y) { + signY = 1; + } + else { + signY = -1; + y = -y; + } + + if (x <= min) { + x = 0; + } + else { + x -= min; + } + if (y <= min) { + y = 0; + } + else { + y -= min; + } + + if (x == 0 && y == 0) { + *px = *py = 0; + return; + } + + if (xy * y <= xy * x) { + d = xy * x + (max - xy) * y; + if (xy * max < d) { + x = (s8)(xy * max * x / d); + y = (s8)(xy * max * y / d); + } + } + else { + d = xy * y + (max - xy) * x; + if (xy * max < d) { + x = (s8)(xy * max * x / d); + y = (s8)(xy * max * y / d); + } + } + + *px = (s8)(signX * x); + *py = (s8)(signY * y); +} + +static void ClampTrigger(u8 *trigger, u8 min, u8 max) +{ + if (*trigger <= min) { + *trigger = 0; + } + else { + if (max < *trigger) { + *trigger = max; + } + *trigger -= min; + } +} + +void PADClamp(PADStatus *status) +{ + // int i; + // for (i = 0; i < PAD_CHANMAX; i++, status++) { + // if (status->err != PAD_ERR_NONE) { + // continue; + // } + + // ClampStick(&status->stickX, &status->stickY, ClampRegion.maxStick, ClampRegion.xyStick, ClampRegion.minStick); + // ClampStick(&status->substickX, &status->substickY, ClampRegion.maxSubstick, ClampRegion.xySubstick, ClampRegion.minSubstick); + // ClampTrigger(&status->triggerL, ClampRegion.minTrigger, ClampRegion.maxTrigger); + // ClampTrigger(&status->triggerR, ClampRegion.minTrigger, ClampRegion.maxTrigger); + // } +}