diff --git a/config/GMPE01_00/rels/m420dll/symbols.txt b/config/GMPE01_00/rels/m420dll/symbols.txt index 8e6b2a20..365355d3 100644 --- a/config/GMPE01_00/rels/m420dll/symbols.txt +++ b/config/GMPE01_00/rels/m420dll/symbols.txt @@ -207,9 +207,9 @@ lbl_1_data_0 = .data:0x00000000; // type:object size:0x1E scope:local data:strin lbl_1_data_1E = .data:0x0000001E; // type:object size:0x1E scope:local data:string lbl_1_data_3C = .data:0x0000003C; // type:object size:0x11 scope:local data:string lbl_1_data_50 = .data:0x00000050; // type:object size:0x80 -lbl_1_data_D0 = .data:0x000000D0; // type:object size:0x1B data:string +lbl_1_data_D0 = .data:0x000000D0; // type:object size:0x1B scope:local data:string lbl_1_data_EC = .data:0x000000EC; // type:object size:0x30 -lbl_1_data_11C = .data:0x0000011C; // type:object size:0x19 data:string +lbl_1_data_11C = .data:0x0000011C; // type:object size:0x19 scope:local data:string lbl_1_data_135 = .data:0x00000135; // type:object size:0x1 lbl_1_data_138 = .data:0x00000138; // type:object size:0x1C lbl_1_data_154 = .data:0x00000154; // type:object size:0x10 data:4byte diff --git a/config/GMPE01_00/splits.txt b/config/GMPE01_00/splits.txt index 7eefd3c3..6b38187f 100644 --- a/config/GMPE01_00/splits.txt +++ b/config/GMPE01_00/splits.txt @@ -680,12 +680,12 @@ dolphin/mtx/vec.c: .sdata2 start:0x801D6380 end:0x801D6390 dolphin/mtx/quat.c: - .text start:0x800BC2C4 end:0x800BC710 + .text start:0x800BC2C4 end:0x800BC884 .rodata start:0x8011E430 end:0x8011E440 .sdata2 start:0x801D6390 end:0x801D63B8 dolphin/mtx/psmtx.c: - .text start:0x800BC710 end:0x800BC9E8 + .text start:0x800BC884 end:0x800BC9E8 dolphin/dvd/dvdlow.c: .text start:0x800BC9E8 end:0x800BD83C @@ -742,7 +742,7 @@ dolphin/demo/DEMOPuts.c: dolphin/demo/DEMOStats.c: .text start:0x800C2F84 end:0x800C3CDC - .data start:0x8013D5E0 end:0x8013D748 + .data start:0x8013D5E0 end:0x8013D738 .sdata start:0x801D3908 end:0x801D3910 .sbss start:0x801D4488 end:0x801D44E8 .sdata2 start:0x801D63D8 end:0x801D63E8 @@ -753,6 +753,7 @@ dolphin/pad/Padclamp.c: dolphin/pad/Pad.c: .text start:0x800C3F14 end:0x800C59DC + .data start:0x8013D738 end:0x8013D748 .bss start:0x801A61B0 end:0x801A6200 .sdata start:0x801D3918 end:0x801D3938 .sbss start:0x801D44E8 end:0x801D4510 diff --git a/config/GMPE01_00/symbols.txt b/config/GMPE01_00/symbols.txt index 1c59588e..99198763 100644 --- a/config/GMPE01_00/symbols.txt +++ b/config/GMPE01_00/symbols.txt @@ -4345,10 +4345,10 @@ lbl_8013C2F0 = .data:0x8013C2F0; // type:object size:0x78 YearDays = .data:0x8013C368; // type:object size:0x30 scope:local LeapYearDays = .data:0x8013C398; // type:object size:0x30 scope:local lbl_8013C3C8 = .data:0x8013C3C8; // type:object size:0x18 scope:local data:string -lbl_8013C3E0 = .data:0x8013C3E0; // type:object size:0xC8 data:string -lbl_8013C4A8 = .data:0x8013C4A8; // type:object size:0x38 -lbl_8013C4E0 = .data:0x8013C4E0; // type:object size:0x34 data:string -lbl_8013C514 = .data:0x8013C514; // type:object size:0x34 +lbl_8013C3E0 = .data:0x8013C3E0; // type:object size:0xC8 scope:local data:string +lbl_8013C4A8 = .data:0x8013C4A8; // type:object size:0x38 scope:local +lbl_8013C4E0 = .data:0x8013C4E0; // type:object size:0x34 scope:local data:string +lbl_8013C514 = .data:0x8013C514; // type:object size:0x34 scope:local @13 = .data:0x8013C548; // type:object size:0x15 scope:local data:string @293 = .data:0x8013C594; // type:object size:0x20 scope:local jumptable_8013C5B4 = .data:0x8013C5B4; // type:object size:0x40 scope:local @@ -4373,7 +4373,7 @@ jumptable_8013D698 = .data:0x8013D698; // type:object size:0x28 scope:local jumptable_8013D6C0 = .data:0x8013D6C0; // type:object size:0x28 scope:local jumptable_8013D6E8 = .data:0x8013D6E8; // type:object size:0x28 scope:local jumptable_8013D710 = .data:0x8013D710; // type:object size:0x28 scope:local -lbl_8013D738 = .data:0x8013D738; // type:object size:0x10 +ResetFunctionInfo = .data:0x8013D738; // type:object size:0x10 lbl_8013D748 = .data:0x8013D748; // type:object size:0x38 lbl_8013D780 = .data:0x8013D780; // type:object size:0x140 jumptable_8013D8C0 = .data:0x8013D8C0; // type:object size:0x68 scope:local @@ -4976,8 +4976,8 @@ __OSArenaLo = .sdata:0x801D38C0; // type:object size:0x4 scope:local data:4byte @32 = .sdata:0x801D38C8; // type:object size:0x2 scope:local data:string fontEncode$80 = .sdata:0x801D38D0; // type:object size:0x8 scope:local data:2byte Unit01 = .sdata:0x801D38D8; // type:object size:0x8 -FirstRead = .sdata:0x801D38E0; // type:object size:0x8 scope:local data:4byte -lbl_801D38E8 = .sdata:0x801D38E8; // type:object size:0x8 data:string +FirstRead = .sdata:0x801D38E0; // type:object size:0x4 scope:local data:4byte +lbl_801D38E8 = .sdata:0x801D38E8; // type:object size:0x8 scope:local data:string autoInvalidation = .sdata:0x801D38F0; // type:object size:0x4 scope:local data:4byte @35 = .sdata:0x801D38F8; // type:object size:0x2 scope:local data:string @40 = .sdata:0x801D38FC; // type:object size:0x4 scope:local data:string @@ -7284,21 +7284,21 @@ lbl_801D6338 = .sdata2:0x801D6338; // type:object size:0x4 data:float lbl_801D633C = .sdata2:0x801D633C; // type:object size:0x4 data:float lbl_801D6340 = .sdata2:0x801D6340; // type:object size:0x8 data:double lbl_801D6348 = .sdata2:0x801D6348; // type:object size:0x8 data:float -lbl_801D6350 = .sdata2:0x801D6350; // type:object size:0x4 data:float -lbl_801D6354 = .sdata2:0x801D6354; // type:object size:0x4 data:float -lbl_801D6358 = .sdata2:0x801D6358; // type:object size:0x4 data:float -lbl_801D635C = .sdata2:0x801D635C; // type:object size:0x4 data:float -lbl_801D6360 = .sdata2:0x801D6360; // type:object size:0x8 data:float -lbl_801D6368 = .sdata2:0x801D6368; // type:object size:0x4 data:float -lbl_801D636C = .sdata2:0x801D636C; // type:object size:0x4 data:float -lbl_801D6370 = .sdata2:0x801D6370; // type:object size:0x4 data:float -lbl_801D6374 = .sdata2:0x801D6374; // type:object size:0x4 data:float -lbl_801D6378 = .sdata2:0x801D6378; // type:object size:0x4 data:float -lbl_801D637C = .sdata2:0x801D637C; // type:object size:0x4 data:float -lbl_801D6380 = .sdata2:0x801D6380; // type:object size:0x4 data:float -lbl_801D6384 = .sdata2:0x801D6384; // type:object size:0x4 data:float -lbl_801D6388 = .sdata2:0x801D6388; // type:object size:0x4 data:float -lbl_801D638C = .sdata2:0x801D638C; // type:object size:0x4 data:float +lbl_801D6350 = .sdata2:0x801D6350; // type:object size:0x4 scope:local data:float +lbl_801D6354 = .sdata2:0x801D6354; // type:object size:0x4 scope:local data:float +lbl_801D6358 = .sdata2:0x801D6358; // type:object size:0x4 scope:local data:float +lbl_801D635C = .sdata2:0x801D635C; // type:object size:0x4 scope:local data:float +lbl_801D6360 = .sdata2:0x801D6360; // type:object size:0x8 scope:local data:float +lbl_801D6368 = .sdata2:0x801D6368; // type:object size:0x4 scope:local data:float +lbl_801D636C = .sdata2:0x801D636C; // type:object size:0x4 scope:local data:float +lbl_801D6370 = .sdata2:0x801D6370; // type:object size:0x4 scope:local data:float +lbl_801D6374 = .sdata2:0x801D6374; // type:object size:0x4 scope:local data:float +lbl_801D6378 = .sdata2:0x801D6378; // type:object size:0x4 scope:local data:float +lbl_801D637C = .sdata2:0x801D637C; // type:object size:0x4 scope:local data:float +lbl_801D6380 = .sdata2:0x801D6380; // type:object size:0x4 scope:local data:float +lbl_801D6384 = .sdata2:0x801D6384; // type:object size:0x4 scope:local data:float +lbl_801D6388 = .sdata2:0x801D6388; // type:object size:0x4 scope:local data:float +lbl_801D638C = .sdata2:0x801D638C; // type:object size:0x4 scope:local data:float lbl_801D6390 = .sdata2:0x801D6390; // type:object size:0x4 data:float lbl_801D6394 = .sdata2:0x801D6394; // type:object size:0x4 data:float lbl_801D6398 = .sdata2:0x801D6398; // type:object size:0x8 data:double diff --git a/configure.py b/configure.py index 67f32c23..cc6076bf 100644 --- a/configure.py +++ b/configure.py @@ -484,11 +484,11 @@ config.libs = [ "mtx", [ Object(NonMatching, "dolphin/mtx/mtx.c"), - Object(NonMatching, "dolphin/mtx/mtxvec.c"), - Object(NonMatching, "dolphin/mtx/mtx44.c"), + Object(Matching, "dolphin/mtx/mtxvec.c"), + Object(Matching, "dolphin/mtx/mtx44.c"), Object(NonMatching, "dolphin/mtx/vec.c"), Object(NonMatching, "dolphin/mtx/quat.c"), - Object(NonMatching, "dolphin/mtx/psmtx.c"), + Object(Matching, "dolphin/mtx/psmtx.c"), ], ), DolphinLib( @@ -521,7 +521,7 @@ config.libs = [ "pad", [ Object(NonMatching, "dolphin/pad/Padclamp.c"), - Object(NonMatching, "dolphin/pad/Pad.c"), + Object(Matching, "dolphin/pad/Pad.c"), ], ), DolphinLib( diff --git a/include/dolphin/os/OSAlarm.h b/include/dolphin/os/OSAlarm.h index a0c9d380..497b4a44 100644 --- a/include/dolphin/os/OSAlarm.h +++ b/include/dolphin/os/OSAlarm.h @@ -1,6 +1,7 @@ #ifndef _DOLPHIN_OSALARM #define _DOLPHIN_OSALARM +#include #include #include diff --git a/src/dolphin/dvd/dvd.c b/src/dolphin/dvd/dvd.c new file mode 100644 index 00000000..9253df47 --- /dev/null +++ b/src/dolphin/dvd/dvd.c @@ -0,0 +1,1466 @@ +#include "dolphin/os/OSAlarm.h" +#include +#include +#include +#include +#include + +typedef void (*stateFunc)(DVDCommandBlock *block); +stateFunc LastState; + +extern OSThreadQueue __DVDThreadQueue; + +static u8 tmpBuffer[0x80] ATTRIBUTE_ALIGN(32); +static DVDCommandBlock *executing; +static DVDDiskID *currID; +static OSBootInfo *bootInfo; +static BOOL autoInvalidation = TRUE; +static volatile BOOL PauseFlag = FALSE; +static volatile BOOL PausingFlag = FALSE; +static volatile BOOL AutoFinishing = FALSE; +static volatile BOOL FatalErrorFlag = FALSE; +static vu32 CurrCommand; +static vu32 Canceling = FALSE; +static DVDCBCallback CancelCallback; +static vu32 ResumeFromHere = 0; +static vu32 CancelLastError; +static vu32 LastError; +static vs32 NumInternalRetry = 0; +static volatile BOOL ResetRequired; +static volatile BOOL CancelAllSyncComplete = FALSE; +static volatile BOOL FirstTimeInBootrom = FALSE; + +static DVDCommandBlock DummyCommandBlock; +static OSAlarm ResetAlarm; + +static BOOL DVDInitialized = FALSE; + +/* States */ +static void stateReadingFST(); +static void stateTimeout(); +static void stateGettingError(); +static void stateGoToRetry(); +static void stateCheckID(); +static void stateCheckID3(); +static void stateCheckID2a(); +static void stateCheckID2(); +static void stateCoverClosed(); +static void stateCoverClosed_CMD(); +static void stateCoverOpen(); +static void stateMotorStopped(); +static void stateReady(); +static void stateBusy(); + +/* Callbacks */ +static void cbForStateReadingFST(u32 intType); +static void cbForStateError(u32 intType); +static void cbForStateGettingError(u32 intType); +static void cbForUnrecoveredError(u32 intType); +static void cbForUnrecoveredErrorRetry(u32 intType); +static void cbForStateGoToRetry(u32 intType); +static void cbForStateCheckID2a(u32 intType); +static void cbForStateCheckID1(u32 intType); +static void cbForStateCheckID2(u32 intType); +static void cbForStateCheckID3(u32 intType); +static void cbForStateCoverClosed(u32 intType); +static void cbForStateMotorStopped(u32 intType); +static void cbForStateBusy(u32 intType); +static void cbForCancelStreamSync(s32 result, DVDCommandBlock *block); +static void cbForCancelSync(s32 result, DVDCommandBlock *block); +static void cbForCancelAllSync(s32 result, DVDCommandBlock *block); + +static void defaultOptionalCommandChecker(DVDCommandBlock *block, DVDLowCallback cb); + +static DVDOptionalCommandChecker checkOptionalCommand = defaultOptionalCommandChecker; + +extern void __DVDInterruptHandler(__OSInterrupt interrupt, OSContext *context); + +static void defaultOptionalCommandChecker(DVDCommandBlock *block, DVDLowCallback cb) { } + +void DVDInit() +{ + if (DVDInitialized) { + return; + } + + OSInitAlarm(); + DVDInitialized = TRUE; + __DVDFSInit(); + __DVDClearWaitingQueue(); + __DVDInitWA(); + bootInfo = (OSBootInfo *)OSPhysicalToCached(0x0000); + currID = &(bootInfo->DVDDiskID); + __OSSetInterruptHandler(21, __DVDInterruptHandler); + __OSUnmaskInterrupts(0x400); + OSInitThreadQueue(&__DVDThreadQueue); + __DIRegs[DI_STATUS] = 42; + __DIRegs[DI_COVER_STATUS] = 0; + if (bootInfo->magic == OS_BOOTINFO_MAGIC_JTAG) { + OSReport("app booted via JTAG\n"); + OSReport("load fst\n"); + __fstLoad(); + } + else if (bootInfo->magic == 0xD15EA5E) { + OSReport("app booted from bootrom\n"); + } + else { + FirstTimeInBootrom = TRUE; + OSReport("bootrom\n"); + } +} + +static void stateReadingFST() +{ + LastState = (stateFunc)stateReadingFST; + + DVDLowRead(bootInfo->FSTLocation, OSRoundUp32B(((u32 *)tmpBuffer)[2]), ((u32 *)tmpBuffer)[1], cbForStateReadingFST); +} + +static void cbForStateReadingFST(u32 intType) +{ + DVDCommandBlock *finished; + if (intType == 0x10) { + executing->state = -1; + stateTimeout(); + } + else if ((intType & 1) != 0) { + NumInternalRetry = 0; + finished = executing; + executing = &DummyCommandBlock; + finished->state = 0; + if (finished->callback) { + (finished->callback)(0, finished); + } + stateReady(); + } + else { + stateGettingError(); + } +} + +inline static void stateError(u32 error) +{ + __DVDStoreErrorCode(error); + DVDLowStopMotor(cbForStateError); +} + +static void cbForStateError(u32 intType) +{ + DVDCommandBlock *finished; + + if (intType == 16) { + executing->state = -1; + stateTimeout(); + return; + } + + FatalErrorFlag = TRUE; + finished = executing; + executing = &DummyCommandBlock; + if (finished->callback) { + (finished->callback)(-1, finished); + } + + if (Canceling) { + Canceling = FALSE; + if (CancelCallback) + (CancelCallback)(0, finished); + } + + stateReady(); + + return; +} + +static void stateTimeout() +{ + __DVDStoreErrorCode(0x1234568); + DVDReset(); + cbForStateError(0); +} + +static void stateGettingError() +{ + DVDLowRequestError(cbForStateGettingError); +} + +static u32 CategorizeError(u32 error) +{ + if (error == 0x20400) { + LastError = error; + return 1; + } + + error &= 0xffffff; + + if ((error == 0x62800) || (error == 0x23a00) || (error == 0xb5a01)) { + return 0; + } + + ++NumInternalRetry; + if (NumInternalRetry == 2) { + if (error == LastError) { + LastError = error; + return 1; + } + else { + LastError = error; + return 2; + } + } + else { + LastError = error; + + if ((error == 0x31100) || (executing->command == 5)) { + return 2; + } + else { + return 3; + } + } +} + +inline static BOOL CheckCancel(u32 resume) +{ + DVDCommandBlock *finished; + + if (Canceling) { + ResumeFromHere = resume; + Canceling = FALSE; + + finished = executing; + executing = &DummyCommandBlock; + + finished->state = 10; + if (finished->callback) + (*finished->callback)(-3, finished); + if (CancelCallback) + (CancelCallback)(0, finished); + stateReady(); + return TRUE; + } + return FALSE; +} + +static void cbForStateGettingError(u32 intType) +{ + u32 error; + u32 status; + u32 errorCategory; + u32 resume; + + if (intType == 16) { + executing->state = -1; + stateTimeout(); + return; + } + + if (intType & 2) { + executing->state = -1; + stateError(0x1234567); + return; + } + + error = __DIRegs[8]; + status = error & 0xff000000; + + errorCategory = CategorizeError(error); + + if (errorCategory == 1) { + executing->state = -1; + stateError(error); + return; + } + + if ((errorCategory == 2) || (errorCategory == 3)) { + resume = 0; + } + else { + if (status == 0x01000000) + resume = 4; + else if (status == 0x02000000) + resume = 6; + else if (status == 0x03000000) + resume = 3; + else + resume = 5; + } + + if (CheckCancel(resume)) + return; + + if (errorCategory == 2) { + __DVDStoreErrorCode(error); + stateGoToRetry(); + return; + } + + if (errorCategory == 3) { + if ((error & 0x00ffffff) == 0x00031100) { + DVDLowSeek(executing->offset, cbForUnrecoveredError); + } + else { + LastState(executing); + } + return; + } + + if (status == 0x01000000) { + executing->state = 5; + stateMotorStopped(); + return; + } + else if (status == 0x02000000) { + executing->state = 3; + stateCoverClosed(); + return; + } + else if (status == 0x03000000) { + executing->state = 4; + stateMotorStopped(); + return; + } + else { + executing->state = -1; + stateError(0x1234567); + return; + } +} + +static void cbForUnrecoveredError(u32 intType) +{ + if (intType == 16) { + executing->state = -1; + stateTimeout(); + return; + } + + if (intType & 1) { + stateGoToRetry(); + return; + } + + DVDLowRequestError(cbForUnrecoveredErrorRetry); +} + +static void cbForUnrecoveredErrorRetry(u32 intType) +{ + if (intType == 16) { + executing->state = -1; + stateTimeout(); + return; + } + executing->state = -1; + + if (intType & 2) { + __DVDStoreErrorCode(0x1234567); + DVDLowStopMotor(cbForStateError); + return; + } + + __DVDStoreErrorCode(__DIRegs[8]); + DVDLowStopMotor(cbForStateError); +} + +static void stateGoToRetry() +{ + DVDLowStopMotor(cbForStateGoToRetry); +} + +static void cbForStateGoToRetry(u32 intType) +{ + if (intType == 16) { + executing->state = -1; + stateTimeout(); + return; + } + + if (intType & 2) { + executing->state = -1; + stateError(0x1234567); + return; + } + + NumInternalRetry = 0; + + if ((CurrCommand == 4) || (CurrCommand == 5) || (CurrCommand == 13) || (CurrCommand == 15)) { + ResetRequired = TRUE; + } + + if (!CheckCancel(2)) { + executing->state = 11; + stateMotorStopped(); + } +} + +static void stateCheckID() +{ + switch (CurrCommand) { + case 3: + if (memcmp(tmpBuffer, executing->id, 0x1C) != FALSE) { + DVDLowStopMotor(cbForStateCheckID1); + } + else { + memcpy(currID, tmpBuffer, sizeof(DVDDiskID)); + executing->state = 1; + DCInvalidateRange(tmpBuffer, sizeof(DVDBB2)); + LastState = stateCheckID2; + stateCheckID2(executing); + } + break; + default: + if (memcmp(tmpBuffer, currID, sizeof(DVDDiskID)) != 0) { + DVDLowStopMotor(cbForStateCheckID1); + } + else { + LastState = stateCheckID3; + stateCheckID3(executing); + } + break; + } +} + +static void stateCheckID3() +{ + DVDLowAudioBufferConfig(currID->streaming, 10, cbForStateCheckID3); +} + +static void stateCheckID2a() +{ + DVDLowAudioBufferConfig(currID->streaming, 10, cbForStateCheckID2a); +} + +static void cbForStateCheckID2a(u32 intType) +{ + if (intType == 16) { + executing->state = -1; + stateTimeout(); + return; + } + + if (intType & 1) { + NumInternalRetry = 0; + stateCheckID2(executing); + return; + } + + DVDLowRequestError(cbForStateGettingError); +} + +static void stateCheckID2(DVDCommandBlock *block) +{ + DVDLowRead(tmpBuffer, OSRoundUp32B(sizeof(DVDBB2)), 0x420, cbForStateCheckID2); +} + +static void cbForStateCheckID1(u32 intType) +{ + if (intType == 16) { + executing->state = -1; + stateTimeout(); + return; + } + + if (intType & 2) { + executing->state = -1; + stateError(0x1234567); + return; + } + + NumInternalRetry = 0; + + if (!CheckCancel(1)) { + executing->state = 6; + stateMotorStopped(); + } +} + +static void cbForStateCheckID2(u32 intType) +{ + if (intType == 16) { + executing->state = -1; + stateTimeout(); + return; + } + + if (intType & 1) { + + NumInternalRetry = 0; + + stateReadingFST(); + } + else { + + stateGettingError(); + } +} + +static void cbForStateCheckID3(u32 intType) +{ + if (intType == 16) { + executing->state = -1; + stateTimeout(); + return; + } + + if (intType & 1) { + + NumInternalRetry = 0; + + if (!CheckCancel(0)) { + executing->state = 1; + stateBusy(executing); + } + } + else { + stateGettingError(); + } +} + +static void AlarmHandler(OSAlarm *alarm, OSContext *context) +{ + DVDReset(); + DCInvalidateRange(tmpBuffer, sizeof(DVDDiskID)); + LastState = stateCoverClosed_CMD; + stateCoverClosed_CMD(executing); +} + +static void stateCoverClosed() +{ + DVDCommandBlock *finished; + + switch (CurrCommand) { + case 5: + case 4: + case 13: + case 15: + __DVDClearWaitingQueue(); + finished = executing; + executing = &DummyCommandBlock; + if (finished->callback) { + (finished->callback)(-4, finished); + } + stateReady(); + break; + + default: + DVDReset(); + OSCreateAlarm(&ResetAlarm); + OSSetAlarm(&ResetAlarm, OSMillisecondsToTicks(1150), AlarmHandler); + break; + } +} + +static void stateCoverClosed_CMD(DVDCommandBlock *block) +{ + DVDLowReadDiskID(tmpBuffer, cbForStateCoverClosed); +} + +static void cbForStateCoverClosed(u32 intType) +{ + if (intType == 16) { + executing->state = -1; + stateTimeout(); + return; + } + + if (intType & 1) { + NumInternalRetry = 0; + stateCheckID(); + } + else { + stateGettingError(); + } +} + +static void stateMotorStopped(void) +{ + DVDLowWaitCoverClose(cbForStateMotorStopped); +} + +static void cbForStateMotorStopped(u32 intType) +{ + __DIRegs[1] = 0; + executing->state = 3; + stateCoverClosed(); +} + +void stateReady() +{ + DVDCommandBlock *finished; + + if (!__DVDCheckWaitingQueue()) { + executing = (DVDCommandBlock *)nullptr; + return; + } + + if (PauseFlag) { + PausingFlag = TRUE; + executing = (DVDCommandBlock *)nullptr; + return; + } + + executing = __DVDPopWaitingQueue(); + + if (FatalErrorFlag) { + executing->state = -1; + finished = executing; + executing = &DummyCommandBlock; + if (finished->callback) { + (finished->callback)(-1, finished); + } + stateReady(); + return; + } + + CurrCommand = executing->command; + + if (ResumeFromHere) { + switch (ResumeFromHere) { + case 1: + executing->state = 6; + stateMotorStopped(); + break; + + case 2: + executing->state = 11; + stateMotorStopped(); + break; + + case 3: + executing->state = 4; + stateMotorStopped(); + break; + + case 7: + executing->state = 7; + stateMotorStopped(); + break; + + case 4: + executing->state = 5; + stateMotorStopped(); + break; + + case 6: + executing->state = 3; + stateCoverClosed(); + break; + + case 5: + executing->state = -1; + stateError(CancelLastError); + break; + } + + ResumeFromHere = 0; + } + else { + executing->state = 1; + stateBusy(executing); + } +} + +#define MIN(a, b) (((a) > (b)) ? (b) : (a)) +static void stateBusy(DVDCommandBlock *block) +{ + DVDCommandBlock *finished; + LastState = stateBusy; + switch (block->command) { + case 5: + __DIRegs[1] = __DIRegs[1]; + block->currTransferSize = sizeof(DVDDiskID); + DVDLowReadDiskID(block->addr, cbForStateBusy); + break; + case 1: + case 4: + __DIRegs[1] = __DIRegs[1]; + block->currTransferSize = MIN(block->length - block->transferredSize, 0x80000); + DVDLowRead((void *)((u8 *)block->addr + block->transferredSize), block->currTransferSize, block->offset + block->transferredSize, + cbForStateBusy); + break; + case 2: + __DIRegs[1] = __DIRegs[1]; + DVDLowSeek(block->offset, cbForStateBusy); + break; + case 3: + DVDLowStopMotor(cbForStateBusy); + break; + case 15: + DVDLowStopMotor(cbForStateBusy); + break; + case 6: + __DIRegs[1] = __DIRegs[1]; + if (AutoFinishing) { + executing->currTransferSize = 0; + DVDLowRequestAudioStatus(0, cbForStateBusy); + } + else { + executing->currTransferSize = 1; + DVDLowAudioStream(0, block->length, block->offset, cbForStateBusy); + } + break; + case 7: + __DIRegs[1] = __DIRegs[1]; + DVDLowAudioStream(0x10000, 0, 0, cbForStateBusy); + break; + case 8: + __DIRegs[1] = __DIRegs[1]; + AutoFinishing = TRUE; + DVDLowAudioStream(0, 0, 0, cbForStateBusy); + break; + case 9: + __DIRegs[1] = __DIRegs[1]; + DVDLowRequestAudioStatus(0, cbForStateBusy); + break; + case 10: + __DIRegs[1] = __DIRegs[1]; + DVDLowRequestAudioStatus(0x10000, cbForStateBusy); + break; + case 11: + __DIRegs[1] = __DIRegs[1]; + DVDLowRequestAudioStatus(0x20000, cbForStateBusy); + break; + case 12: + __DIRegs[1] = __DIRegs[1]; + DVDLowRequestAudioStatus(0x30000, cbForStateBusy); + break; + case 13: + __DIRegs[1] = __DIRegs[1]; + DVDLowAudioBufferConfig(block->offset, block->length, cbForStateBusy); + break; + case 14: + __DIRegs[1] = __DIRegs[1]; + block->currTransferSize = sizeof(DVDDriveInfo); + DVDLowInquiry(block->addr, cbForStateBusy); + break; + } +} + +// removing these matches DVDCancelAsync and DVDCheckDisk +static u32 ImmCommand[] = { 0xffffffff, 0xffffffff, 0xffffffff }; +static u32 DmaCommand[] = { 0xffffffff }; + +inline static BOOL IsImmCommandWithResult(u32 command) +{ + u32 i; + + if (command == 9 || command == 10 || command == 11 || command == 12) { + return TRUE; + } + + for (i = 0; i < sizeof(ImmCommand) / sizeof(ImmCommand[0]); i++) { + if (command == ImmCommand[i]) + return TRUE; + } + + return FALSE; +} + +inline static BOOL IsDmaCommand(u32 command) +{ + u32 i; + + if (command == 1 || command == 4 || command == 5 || command == 14) + return TRUE; + + for (i = 0; i < sizeof(DmaCommand) / sizeof(DmaCommand[0]); i++) { + if (command == DmaCommand[i]) + return TRUE; + } + + return FALSE; +} + +void cbForStateBusy(u32 intType) +{ + DVDCommandBlock *finished; + + if (intType == 16) { + executing->state = -1; + stateTimeout(); + return; + } + + if ((CurrCommand == 3) || (CurrCommand == 15)) { + if (intType & 2) { + executing->state = -1; + stateError(0x1234567); + return; + } + + NumInternalRetry = 0; + + if (CurrCommand == 15) { + ResetRequired = TRUE; + } + + if (CheckCancel(7)) { + return; + } + + executing->state = 7; + stateMotorStopped(); + return; + } + + if ((CurrCommand == 1) || (CurrCommand == 4) || (CurrCommand == 5) || (CurrCommand == 14)) { + executing->transferredSize += executing->currTransferSize - __DIRegs[6]; + } + + if (intType & 8) { + Canceling = FALSE; + finished = executing; + executing = &DummyCommandBlock; + + finished->state = 10; + if (finished->callback) { + (*finished->callback)(-3, finished); + } + if (CancelCallback) { + (CancelCallback)(0, finished); + } + stateReady(); + + return; + } + + if (intType & 1) { + NumInternalRetry = 0; + + if (CheckCancel(0)) + return; + + if ((CurrCommand == 1) || (CurrCommand == 4) || (CurrCommand == 5) || (CurrCommand == 14)) { + if (executing->transferredSize != executing->length) { + stateBusy(executing); + return; + } + + finished = executing; + executing = &DummyCommandBlock; + + finished->state = 0; + if (finished->callback) { + (finished->callback)((s32)finished->transferredSize, finished); + } + stateReady(); + } + else if ((CurrCommand == 9) || (CurrCommand == 10) || (CurrCommand == 11) || (CurrCommand == 12)) { + s32 result; + + if ((CurrCommand == 11) || (CurrCommand == 10)) { + result = (s32)(__DIRegs[DI_MM_BUF] << 2); + } + else { + result = (s32)__DIRegs[DI_MM_BUF]; + } + finished = executing; + executing = &DummyCommandBlock; + + finished->state = 0; + if (finished->callback) { + (finished->callback)(result, finished); + } + stateReady(); + } + else if (CurrCommand == 6) { + if (executing->currTransferSize == 0) { + if (__DIRegs[DI_MM_BUF] & 1) { + finished = executing; + executing = &DummyCommandBlock; + + finished->state = 9; + if (finished->callback) { + (finished->callback)(-2, finished); + } + stateReady(); + } + else { + AutoFinishing = FALSE; + executing->currTransferSize = 1; + DVDLowAudioStream(0, executing->length, executing->offset, cbForStateBusy); + } + } + else { + finished = executing; + executing = &DummyCommandBlock; + + finished->state = 0; + if (finished->callback) { + (finished->callback)(0, finished); + } + stateReady(); + } + } + else { + finished = executing; + executing = &DummyCommandBlock; + + finished->state = 0; + if (finished->callback) { + (finished->callback)(0, finished); + } + stateReady(); + } + } + else { + if (CurrCommand == 14) { + executing->state = -1; + stateError(0x01234567); + return; + } + + if ((CurrCommand == 1 || CurrCommand == 4 || CurrCommand == 5 || CurrCommand == 14) && (executing->transferredSize == executing->length)) { + + if (CheckCancel(0)) { + return; + } + finished = executing; + executing = &DummyCommandBlock; + + finished->state = 0; + if (finished->callback) { + (finished->callback)((s32)finished->transferredSize, finished); + } + stateReady(); + return; + } + + stateGettingError(); + } +} + +static BOOL issueCommand(s32 prio, DVDCommandBlock *block) +{ + BOOL level; + BOOL result; + + if (autoInvalidation && (block->command == 1 || block->command == 4 || block->command == 5 || block->command == 14)) { + DCInvalidateRange(block->addr, block->length); + } + + level = OSDisableInterrupts(); + + block->state = 2; + result = __DVDPushWaitingQueue(prio, block); + + if ((executing == (DVDCommandBlock *)NULL) && (PauseFlag == FALSE)) { + stateReady(); + } + + OSRestoreInterrupts(level); + + return result; +} + +BOOL DVDReadAbsAsyncPrio(DVDCommandBlock *block, void *addr, s32 length, s32 offset, DVDCBCallback callback, s32 prio) +{ + BOOL idle; + block->command = 1; + block->addr = addr; + block->length = length; + block->offset = offset; + block->transferredSize = 0; + block->callback = callback; + + idle = issueCommand(prio, block); + return idle; +} +BOOL DVDReadAbsAsyncForBS(DVDCommandBlock *block, void *addr, s32 length, s32 offset, DVDCBCallback callback) +{ + BOOL idle; + block->command = 4; + block->addr = addr; + block->length = length; + block->offset = offset; + block->transferredSize = 0; + block->callback = callback; + + idle = issueCommand(2, block); + return idle; +} +BOOL DVDReadDiskID(DVDCommandBlock *block, DVDDiskID *diskID, DVDCBCallback callback) +{ + BOOL idle; + block->command = 5; + block->addr = diskID; + block->length = sizeof(DVDDiskID); + ; + block->offset = 0; + block->transferredSize = 0; + block->callback = callback; + + idle = issueCommand(2, block); + return idle; +} +BOOL DVDPrepareStreamAbsAsync(DVDCommandBlock *block, u32 length, u32 offset, DVDCBCallback callback) +{ + BOOL idle; + block->command = 6; + block->length = length; + block->offset = offset; + block->callback = callback; + + idle = issueCommand(1, block); + return idle; +} +BOOL DVDCancelStreamAsync(DVDCommandBlock *block, DVDCBCallback callback) +{ + BOOL idle; + block->command = 7; + block->callback = callback; + idle = issueCommand(1, block); + return idle; +} +s32 DVDCancelStream(DVDCommandBlock *block) +{ + BOOL result; + s32 state; + BOOL enabled; + s32 retVal; + + result = DVDCancelStreamAsync(block, cbForCancelStreamSync); + + if (result == FALSE) { + return -1; + } + + enabled = OSDisableInterrupts(); + + while (TRUE) { + state = ((volatile DVDCommandBlock *)block)->state; + + if (state == 0 || state == -1 || state == 10) { + retVal = (s32)block->transferredSize; + break; + } + + OSSleepThread(&__DVDThreadQueue); + } + + OSRestoreInterrupts(enabled); + return retVal; +} +static void cbForCancelStreamSync(s32 result, DVDCommandBlock *block) +{ + block->transferredSize = (u32)result; + OSWakeupThread(&__DVDThreadQueue); +} +BOOL DVDStopStreamAtEndAsync(DVDCommandBlock *block, DVDCBCallback callback) +{ + BOOL idle; + + block->command = 8; + block->callback = callback; + + idle = issueCommand(1, block); + + return idle; +} +BOOL DVDGetStreamErrorStatusAsync(DVDCommandBlock *block, DVDCBCallback callback) +{ + BOOL idle; + + block->command = 9; + block->callback = callback; + + idle = issueCommand(1, block); + + return idle; +} +BOOL DVDGetStreamPlayAddrAsync(DVDCommandBlock *block, DVDCBCallback callback) +{ + BOOL idle; + + block->command = 10; + block->callback = callback; + + idle = issueCommand(1, block); + + return idle; +} +BOOL DVDInquiryAsync(DVDCommandBlock *block, DVDDriveInfo *info, DVDCBCallback callback) +{ + BOOL idle; + + block->command = 14; + block->addr = (void *)info; + block->length = sizeof(DVDDriveInfo); + block->transferredSize = 0; + block->callback = callback; + + idle = issueCommand(2, block); + + return idle; +} + +void DVDReset(void) +{ + DVDLowReset(); + __DIRegs[0] = 0x2a; + __DIRegs[1] = __DIRegs[1]; + ResetRequired = FALSE; + ResumeFromHere = 0; +} + +s32 DVDGetCommandBlockStatus(const DVDCommandBlock *block) +{ + BOOL enabled; + s32 retVal; + + enabled = OSDisableInterrupts(); + + if (block->state == 3) { + retVal = 1; + } + else { + retVal = block->state; + } + + OSRestoreInterrupts(enabled); + + return retVal; +} + +s32 DVDGetDriveStatus() +{ + BOOL enabled; + s32 retVal; + + enabled = OSDisableInterrupts(); + + if (FatalErrorFlag) { + retVal = -1; + } + else if (PausingFlag) { + retVal = 8; + } + else { + if (executing == (DVDCommandBlock *)NULL) { + retVal = 0; + } + else if (executing == &DummyCommandBlock) { + retVal = 0; + } + else { + retVal = DVDGetCommandBlockStatus(executing); + } + } + + OSRestoreInterrupts(enabled); + + return retVal; +} + +BOOL DVDSetAutoInvalidation(BOOL autoInval) +{ + BOOL prev; + prev = autoInvalidation; + autoInvalidation = autoInval; + return prev; +} + +inline void DVDPause(void) +{ + BOOL level; + level = OSDisableInterrupts(); + PauseFlag = TRUE; + if (executing == (DVDCommandBlock *)NULL) { + PausingFlag = TRUE; + } + OSRestoreInterrupts(level); +} + +inline void DVDResume(void) +{ + BOOL level; + level = OSDisableInterrupts(); + PauseFlag = FALSE; + if (PausingFlag) { + PausingFlag = FALSE; + stateReady(); + } + OSRestoreInterrupts(level); +} + +BOOL DVDCancelAsync(DVDCommandBlock *block, DVDCBCallback callback) +{ + BOOL enabled; + DVDLowCallback old; + + enabled = OSDisableInterrupts(); + + switch (block->state) { + case -1: + case 0: + case 10: + if (callback) + (*callback)(0, block); + break; + + case 1: + if (Canceling) { + OSRestoreInterrupts(enabled); + return FALSE; + } + + Canceling = TRUE; + CancelCallback = callback; + if (block->command == 4 || block->command == 1) { + DVDLowBreak(); + } + break; + + case 2: + __DVDDequeueWaitingQueue(block); + block->state = 10; + if (block->callback) + (block->callback)(-3, block); + if (callback) + (*callback)(0, block); + break; + + case 3: + switch (block->command) { + case 5: + case 4: + case 13: + case 15: + if (callback) + (*callback)(0, block); + break; + + default: + if (Canceling) { + OSRestoreInterrupts(enabled); + return FALSE; + } + Canceling = TRUE; + CancelCallback = callback; + break; + } + break; + + case 4: + case 5: + case 6: + case 7: + case 11: + old = DVDLowClearCallback(); + if (old != cbForStateMotorStopped) { + OSRestoreInterrupts(enabled); + return FALSE; + } + + if (block->state == 4) + ResumeFromHere = 3; + if (block->state == 5) + ResumeFromHere = 4; + if (block->state == 6) + ResumeFromHere = 1; + if (block->state == 11) + ResumeFromHere = 2; + if (block->state == 7) + ResumeFromHere = 7; + + block->state = 10; + if (block->callback) { + (block->callback)(-3, block); + } + if (callback) { + (callback)(0, block); + } + stateReady(); + break; + } + + OSRestoreInterrupts(enabled); + return TRUE; +} + +s32 DVDCancel(DVDCommandBlock *block) +{ + BOOL result; + s32 state; + u32 command; + BOOL enabled; + + result = DVDCancelAsync(block, cbForCancelSync); + + if (result == FALSE) { + return -1; + } + + enabled = OSDisableInterrupts(); + + for (;;) { + state = ((volatile DVDCommandBlock *)block)->state; + + if ((state == 0) || (state == -1) || (state == 10)) { + break; + } + + if (state == 3) { + command = ((volatile DVDCommandBlock *)block)->command; + + if ((command == 4) || (command == 5) || (command == 13) || (command == 15)) { + break; + } + } + + OSSleepThread(&__DVDThreadQueue); + } + + OSRestoreInterrupts(enabled); + return 0; +} + +static void cbForCancelSync(s32 result, DVDCommandBlock *block) +{ + OSWakeupThread(&__DVDThreadQueue); +} + +inline BOOL DVDCancelAllAsync(DVDCBCallback callback) +{ + BOOL enabled; + DVDCommandBlock *p; + BOOL retVal; + + enabled = OSDisableInterrupts(); + DVDPause(); + + while ((p = __DVDPopWaitingQueue()) != 0) { + DVDCancelAsync(p, NULL); + } + + if (executing) + retVal = DVDCancelAsync(executing, callback); + else { + retVal = TRUE; + if (callback) + (*callback)(0, NULL); + } + + DVDResume(); + OSRestoreInterrupts(enabled); + return retVal; +} + +s32 DVDCancelAll(void) +{ + BOOL result; + BOOL enabled; + + enabled = OSDisableInterrupts(); + CancelAllSyncComplete = FALSE; + + result = DVDCancelAllAsync(cbForCancelAllSync); + + if (result == FALSE) { + OSRestoreInterrupts(enabled); + return -1; + } + + for (;;) { + if (CancelAllSyncComplete) + break; + + OSSleepThread(&__DVDThreadQueue); + } + + OSRestoreInterrupts(enabled); + return 0; +} + +static void cbForCancelAllSync(s32 result, DVDCommandBlock *block) +{ + CancelAllSyncComplete = TRUE; + OSWakeupThread(&__DVDThreadQueue); +} + +DVDDiskID *DVDGetCurrentDiskID(void) +{ + return (DVDDiskID *)OSPhysicalToCached(0); +} +BOOL DVDCheckDisk(void) +{ + BOOL enabled; + s32 retVal; + s32 state; + u32 coverReg; + + enabled = OSDisableInterrupts(); + + if (FatalErrorFlag) { + state = -1; + } + else if (PausingFlag) { + state = 8; + } + else { + if (executing == (DVDCommandBlock *)NULL) { + state = 0; + } + else if (executing == &DummyCommandBlock) { + state = 0; + } + else { + state = executing->state; + } + } + + switch (state) { + case 1: + case 9: + case 10: + case 2: + retVal = TRUE; + break; + + case -1: + case 11: + case 7: + case 3: + case 4: + case 5: + case 6: + retVal = FALSE; + break; + + case 0: + case 8: + coverReg = __DIRegs[1]; + if (((coverReg >> 2) & 1) || (coverReg & 1)) { + retVal = FALSE; + } + else { + retVal = TRUE; + } + } + + OSRestoreInterrupts(enabled); + + return retVal; +} + +void __DVDPrepareResetAsync(DVDCBCallback callback) +{ + BOOL enabled; + + enabled = OSDisableInterrupts(); + + __DVDClearWaitingQueue(); + + if (Canceling) { + CancelCallback = callback; + } + else { + if (executing) { + executing->callback = NULL; + } + + DVDCancelAllAsync(callback); + } + + OSRestoreInterrupts(enabled); +} diff --git a/src/dolphin/mtx/mtx.c b/src/dolphin/mtx/mtx.c new file mode 100644 index 00000000..a8f2bb06 --- /dev/null +++ b/src/dolphin/mtx/mtx.c @@ -0,0 +1,1315 @@ +#include "dolphin/mtx.h" + +static f32 Unit01[] = { 0.0f, 1.0f }; + +extern f32 sinf(f32); + +void C_MTXIdentity(Mtx mtx) +{ + mtx[0][0] = 1.0f; + mtx[0][1] = 0.0f; + mtx[0][2] = 0.0f; + mtx[1][0] = 0.0f; + mtx[1][1] = 1.0f; + mtx[1][2] = 0.0f; + mtx[2][0] = 0.0f; + mtx[2][1] = 0.0f; + mtx[2][2] = 1.0f; +} + +#ifdef GEKKO +void PSMTXIdentity(register Mtx m) +{ + register f32 zero_c = 0.0f; + register f32 one_c = 1.0f; + register f32 c_01; + register f32 c_10; + // clang-format off + asm { + psq_st zero_c, 8(m), 0, 0 + ps_merge01 c_01, zero_c, one_c + psq_st zero_c, 24(m), 0, 0 + ps_merge10 c_10, one_c, zero_c + psq_st zero_c, 32(m), 0, 0 + psq_st c_01, 16(m), 0, 0 + psq_st c_10, 0(m), 0, 0 + psq_st c_10, 40(m), 0, 0 + } + // clang-format on +} +#endif + +void C_MTXCopy(const Mtx src, Mtx dst) +{ + + if (src == dst) { + return; + } + + dst[0][0] = src[0][0]; + dst[0][1] = src[0][1]; + dst[0][2] = src[0][2]; + dst[0][3] = src[0][3]; + + dst[1][0] = src[1][0]; + dst[1][1] = src[1][1]; + dst[1][2] = src[1][2]; + dst[1][3] = src[1][3]; + + dst[2][0] = src[2][0]; + dst[2][1] = src[2][1]; + dst[2][2] = src[2][2]; + dst[2][3] = src[2][3]; +} + +#ifdef GEKKO +asm void PSMTXCopy(const register Mtx src, register Mtx dst) +{ + // clang-format off + nofralloc + + psq_l fp0, 0(src), 0, 0 + psq_st fp0, 0(dst), 0, 0 + psq_l fp1, 8(src), 0, 0 + psq_st fp1, 8(dst), 0, 0 + psq_l fp2, 16(src), 0, 0 + psq_st fp2, 16(dst), 0, 0 + psq_l fp3, 24(src), 0, 0 + psq_st fp3, 24(dst), 0, 0 + psq_l fp4, 32(src), 0, 0 + psq_st fp4, 32(dst), 0, 0 + psq_l fp5, 40(src), 0, 0 + psq_st fp5, 40(dst), 0, 0 + + blr + // clang-format on +} +#endif + +void C_MTXConcat(const Mtx a, const Mtx b, Mtx ab) +{ + Mtx mTmp; + MtxPtr m; + + if ((ab == a) || (ab == b)) { + m = mTmp; + } + + else { + m = ab; + } + + m[0][0] = a[0][0] * b[0][0] + a[0][1] * b[1][0] + a[0][2] * b[2][0]; + m[0][1] = a[0][0] * b[0][1] + a[0][1] * b[1][1] + a[0][2] * b[2][1]; + m[0][2] = a[0][0] * b[0][2] + a[0][1] * b[1][2] + a[0][2] * b[2][2]; + m[0][3] = a[0][0] * b[0][3] + a[0][1] * b[1][3] + a[0][2] * b[2][3] + a[0][3]; + + m[1][0] = a[1][0] * b[0][0] + a[1][1] * b[1][0] + a[1][2] * b[2][0]; + m[1][1] = a[1][0] * b[0][1] + a[1][1] * b[1][1] + a[1][2] * b[2][1]; + m[1][2] = a[1][0] * b[0][2] + a[1][1] * b[1][2] + a[1][2] * b[2][2]; + m[1][3] = a[1][0] * b[0][3] + a[1][1] * b[1][3] + a[1][2] * b[2][3] + a[1][3]; + + m[2][0] = a[2][0] * b[0][0] + a[2][1] * b[1][0] + a[2][2] * b[2][0]; + m[2][1] = a[2][0] * b[0][1] + a[2][1] * b[1][1] + a[2][2] * b[2][1]; + m[2][2] = a[2][0] * b[0][2] + a[2][1] * b[1][2] + a[2][2] * b[2][2]; + m[2][3] = a[2][0] * b[0][3] + a[2][1] * b[1][3] + a[2][2] * b[2][3] + a[2][3]; + + if (m == mTmp) { + C_MTXCopy(mTmp, ab); + } +} + +#ifdef GEKKO +asm void PSMTXConcat(const register Mtx mA, const register Mtx mB, register Mtx mAB) +{ + // clang-format off + nofralloc + +#define FP0 fp0 +#define FP1 fp1 +#define FP2 fp2 +#define FP3 fp3 +#define FP4 fp4 +#define FP5 fp5 +#define FP6 fp6 +#define FP7 fp7 +#define FP8 fp8 +#define FP9 fp9 +#define FP10 fp10 +#define FP11 fp11 +#define FP12 fp12 +#define FP13 fp13 +#define FP14 fp14 +#define FP15 fp15 +#define FP31 fp31 + stwu r1, -64(r1); + psq_l FP0, 0(mA), 0, 0; + stfd fp14, 8(r1); + psq_l FP6, 0(mB), 0, 0; + addis r6, 0, Unit01@ha; + psq_l FP7, 8(mB), 0, 0; + stfd fp15, 16(r1) + addi r6, r6, Unit01@l; + stfd fp31, 40(r1) + psq_l FP8, 16(mB), 0, 0 + ps_muls0 FP12, FP6, FP0 + psq_l FP2, 16(mA), 0, 0 + ps_muls0 FP13, FP7, FP0 + psq_l FP31, 0(r6), 0, 0 + ps_muls0 FP14, FP6, FP2 + psq_l FP9, 24(mB), 0, 0 + ps_muls0 FP15, FP7, FP2 + psq_l FP1, 8(mA), 0, 0 + ps_madds1 FP12, FP8, FP0, FP12 + psq_l FP3, 24(mA), 0, 0 + ps_madds1 FP14, FP8, FP2, FP14 + psq_l FP10, 32(mB), 0, 0 + ps_madds1 FP13, FP9, FP0, FP13 + psq_l FP11, 40(mB), 0, 0 + ps_madds1 FP15, FP9, FP2, FP15 + psq_l FP4, 32(mA), 0, 0 + psq_l FP5, 40(mA), 0, 0 + ps_madds0 FP12, FP10, FP1, FP12 + ps_madds0 FP13, FP11, FP1, FP13 + ps_madds0 FP14, FP10, FP3, FP14 + ps_madds0 FP15, FP11, FP3, FP15 + psq_st FP12, 0(mAB), 0, 0 + + ps_muls0 FP2, FP6, FP4 + ps_madds1 FP13, FP31, FP1, FP13 + ps_muls0 FP0, FP7, FP4 + psq_st FP14, 16(mAB), 0, 0 + ps_madds1 FP15, FP31, FP3, FP15 + + psq_st FP13, 8(mAB), 0, 0 + + ps_madds1 FP2, FP8, FP4, FP2 + ps_madds1 FP0, FP9, FP4, FP0 + ps_madds0 FP2, FP10, FP5, FP2 + lfd fp14, 8(r1) + psq_st FP15, 24(mAB), 0, 0 + ps_madds0 FP0, FP11, FP5, FP0 + psq_st FP2, 32(mAB), 0, 0 + ps_madds1 FP0, FP31, FP5, FP0 + lfd fp15, 16(r1) + psq_st FP0, 40(mAB), 0, 0 + + lfd fp31, 40(r1) + addi r1, r1, 64 + + blr + // clang-format on + +#undef FP0 +#undef FP1 +#undef FP2 +#undef FP3 +#undef FP4 +#undef FP5 +#undef FP6 +#undef FP7 +#undef FP8 +#undef FP9 +#undef FP10 +#undef FP11 +#undef FP12 +#undef FP13 +#undef FP14 +#undef FP15 +#undef FP31 +} +#endif + +void C_MTXConcatArray(const Mtx a, const Mtx *srcBase, Mtx *dstBase, u32 count) +{ + u32 i; + for (i = 0; i < count; i++) { + C_MTXConcat(a, *srcBase, *dstBase); + + srcBase++; + dstBase++; + } +} + +#ifdef GEKKO +#if (defined(__MWERKS__) && defined(_DEBUG)) +#pragma global_optimizer on +#pragma optimization_level 1 +#endif + +void PSMTXConcatArray(const register Mtx a, const register Mtx *srcBase, register Mtx *dstBase, register u32 count) +{ + register f32 va0, va1, va2, va3, va4, va5; + register f32 vb0, vb1, vb2, vb3, vb4, vb5; + register f32 vd0, vd1, vd2, vd3, vd4, vd5; + register f32 u01; + register f32 *u01Ptr = Unit01; + + // clang-format off + asm + { + psq_l va0, 0(a), 0, 0 + psq_l va1, 8(a), 0, 0 + psq_l va2, 16(a), 0, 0 + psq_l va3, 24(a), 0, 0 + subi count, count, 1 + psq_l va4, 32(a), 0, 0 + psq_l va5, 40(a), 0, 0 + mtctr count + psq_l u01, 0(u01Ptr), 0, 0 + + psq_l vb0, 0(srcBase), 0, 0 + psq_l vb2, 16(srcBase), 0, 0 + + ps_muls0 vd0, vb0, va0 + ps_muls0 vd2, vb0, va2 + ps_muls0 vd4, vb0, va4 + + psq_l vb4, 32(srcBase), 0, 0 + + ps_madds1 vd0, vb2, va0, vd0 + ps_madds1 vd2, vb2, va2, vd2 + ps_madds1 vd4, vb2, va4, vd4 + + psq_l vb1, 8(srcBase), 0, 0 + + ps_madds0 vd0, vb4, va1, vd0 + ps_madds0 vd2, vb4, va3, vd2 + ps_madds0 vd4, vb4, va5, vd4 + + psq_l vb3, 24(srcBase), 0, 0 + psq_st vd0, 0(dstBase), 0, 0 + + ps_muls0 vd1, vb1, va0 + ps_muls0 vd3, vb1, va2 + ps_muls0 vd5, vb1, va4 + + psq_l vb5, 40(srcBase), 0, 0 + psq_st vd2, 16(dstBase), 0, 0 + ps_madds1 vd1, vb3, va0, vd1 + ps_madds1 vd3, vb3, va2, vd3 + ps_madds1 vd5, vb3, va4, vd5 + +_loop: + addi srcBase, srcBase, sizeof(Mtx) + ps_madds0 vd1, vb5, va1, vd1 + ps_madds0 vd3, vb5, va3, vd3 + ps_madds0 vd5, vb5, va5, vd5 + psq_l vb0, 0(srcBase), 0, 0 + psq_st vd4, 32(dstBase), 0, 0 + ps_madd vd1, u01, va1, vd1 + ps_madd vd3, u01, va3, vd3 + ps_madd vd5, u01, va5, vd5 + psq_l vb2, 16(srcBase), 0, 0 + psq_st vd1, 8(dstBase), 0, 0 + ps_muls0 vd0, vb0, va0 + ps_muls0 vd2, vb0, va2 + ps_muls0 vd4, vb0, va4 + psq_l vb4, 32(srcBase), 0, 0 + psq_st vd3, 24(dstBase), 0, 0 + ps_madds1 vd0, vb2, va0, vd0 + ps_madds1 vd2, vb2, va2, vd2 + ps_madds1 vd4, vb2, va4, vd4 + psq_l vb1, 8(srcBase), 0, 0 + psq_st vd5, 40(dstBase), 0, 0 + addi dstBase, dstBase, sizeof(Mtx) + + ps_madds0 vd0, vb4, va1, vd0 + ps_madds0 vd2, vb4, va3, vd2 + ps_madds0 vd4, vb4, va5, vd4 + psq_l vb3, 24(srcBase), 0, 0 + psq_st vd0, 0(dstBase), 0, 0 + ps_muls0 vd1, vb1, va0 + ps_muls0 vd3, vb1, va2 + ps_muls0 vd5, vb1, va4 + psq_l vb5, 40(srcBase), 0, 0 + psq_st vd2, 16(dstBase), 0, 0 + ps_madds1 vd1, vb3, va0, vd1 + ps_madds1 vd3, vb3, va2, vd3 + ps_madds1 vd5, vb3, va4, vd5 + bdnz _loop + psq_st vd4, 32(dstBase), 0, 0 + ps_madds0 vd1, vb5, va1, vd1 + ps_madds0 vd3, vb5, va3, vd3 + ps_madds0 vd5, vb5, va5, vd5 + ps_madd vd1, u01, va1, vd1 + ps_madd vd3, u01, va3, vd3 + ps_madd vd5, u01, va5, vd5 + psq_st vd1, 8(dstBase), 0, 0 + psq_st vd3, 24(dstBase), 0, 0 + psq_st vd5, 40(dstBase), 0, 0 + } + // clang-format on +} + +#if (defined(__MWERKS__) && defined(_DEBUG)) +#pragma optimization_level 0 +#pragma global_optimizer reset +#endif + +#endif + +void C_MTXTranspose(const Mtx src, Mtx xPose) +{ + Mtx mTmp; + MtxPtr m; + + if (src == xPose) { + m = mTmp; + } + else { + m = xPose; + } + + m[0][0] = src[0][0]; + m[0][1] = src[1][0]; + m[0][2] = src[2][0]; + m[0][3] = 0.0f; + m[1][0] = src[0][1]; + m[1][1] = src[1][1]; + m[1][2] = src[2][1]; + m[1][3] = 0.0f; + m[2][0] = src[0][2]; + m[2][1] = src[1][2]; + m[2][2] = src[2][2]; + m[2][3] = 0.0f; + + if (m == mTmp) { + C_MTXCopy(mTmp, xPose); + } +} + +#ifdef GEKKO +void PSMTXTranspose(const register Mtx src, register Mtx xPose) +{ + register f32 c_zero = 0.0F; + register f32 row0a, row1a, row0b, row1b; + register f32 trns0, trns1, trns2; + // clang-format off + asm + { + psq_l row0a, 0(src), 0, 0 + stfs c_zero, 44(xPose) + psq_l row1a, 16(src), 0, 0 + ps_merge00 trns0, row0a, row1a + psq_l row0b, 8(src), 1, 0 + ps_merge11 trns1, row0a, row1a + psq_l row1b, 24(src), 1, 0 + psq_st trns0, 0(xPose), 0, 0 + psq_l row0a, 32(src), 0, 0 + ps_merge00 trns2, row0b, row1b + psq_st trns1, 16(xPose), 0, 0 + ps_merge00 trns0, row0a, c_zero + psq_st trns2, 32(xPose), 0, 0 + ps_merge10 trns1, row0a, c_zero + psq_st trns0, 8(xPose), 0, 0 + lfs row0b, 40(src) + psq_st trns1, 24(xPose), 0, 0 + stfs row0b, 40(xPose) + } + // clang-format on +} +#endif + +u32 C_MTXInverse(const Mtx src, Mtx inv) +{ + Mtx mTmp; + MtxPtr m; + f32 det; + + if (src == inv) { + m = mTmp; + } + else { + m = inv; + } + + det = src[0][0] * src[1][1] * src[2][2] + src[0][1] * src[1][2] * src[2][0] + src[0][2] * src[1][0] * src[2][1] + - src[2][0] * src[1][1] * src[0][2] - src[1][0] * src[0][1] * src[2][2] - src[0][0] * src[2][1] * src[1][2]; + + if (det == 0.0f) { + return 0; + } + + det = 1.0f / det; + + m[0][0] = (src[1][1] * src[2][2] - src[2][1] * src[1][2]) * det; + m[0][1] = -(src[0][1] * src[2][2] - src[2][1] * src[0][2]) * det; + m[0][2] = (src[0][1] * src[1][2] - src[1][1] * src[0][2]) * det; + + m[1][0] = -(src[1][0] * src[2][2] - src[2][0] * src[1][2]) * det; + m[1][1] = (src[0][0] * src[2][2] - src[2][0] * src[0][2]) * det; + m[1][2] = -(src[0][0] * src[1][2] - src[1][0] * src[0][2]) * det; + + m[2][0] = (src[1][0] * src[2][1] - src[2][0] * src[1][1]) * det; + m[2][1] = -(src[0][0] * src[2][1] - src[2][0] * src[0][1]) * det; + m[2][2] = (src[0][0] * src[1][1] - src[1][0] * src[0][1]) * det; + + m[0][3] = -m[0][0] * src[0][3] - m[0][1] * src[1][3] - m[0][2] * src[2][3]; + m[1][3] = -m[1][0] * src[0][3] - m[1][1] * src[1][3] - m[1][2] * src[2][3]; + m[2][3] = -m[2][0] * src[0][3] - m[2][1] * src[1][3] - m[2][2] * src[2][3]; + + if (m == mTmp) { + C_MTXCopy(mTmp, inv); + } + + return 1; +} + +#ifdef GEKKO +asm u32 PSMTXInverse(const register Mtx src, register Mtx inv) { + // clang-format off + nofralloc + + psq_l fp0, 0(src), 1, 0 + psq_l fp1, 4(src), 0, 0 + psq_l fp2, 16(src), 1, 0 + ps_merge10 fp6, fp1, fp0 + psq_l fp3, 20(src), 0, 0 + psq_l fp4, 32(src), 1, 0 + ps_merge10 fp7, fp3, fp2 + psq_l fp5, 36(src), 0, 0 + ps_mul fp11, fp3, fp6 + ps_mul fp13, fp5, fp7 + ps_merge10 fp8, fp5, fp4 + ps_msub fp11, fp1, fp7, fp11 + ps_mul fp12, fp1, fp8 + ps_msub fp13, fp3, fp8, fp13 + ps_mul fp10, fp3, fp4 + ps_msub fp12, fp5, fp6, fp12 + ps_mul fp9, fp0, fp5 + ps_mul fp8, fp1, fp2 + ps_sub fp6, fp6, fp6 + ps_msub fp10, fp2, fp5, fp10 + ps_mul fp7, fp0, fp13 + ps_msub fp9, fp1, fp4, fp9 + ps_madd fp7, fp2, fp12, fp7 + ps_msub fp8, fp0, fp3, fp8 + ps_madd fp7, fp4, fp11, fp7 + ps_cmpo0 cr0, fp7, fp6 + bne _regular + addi r3, 0, 0 + blr + +_regular: + fres fp0, fp7 + ps_add fp6, fp0, fp0 + ps_mul fp5, fp0, fp0 + ps_nmsub fp0, fp7, fp5, fp6 + lfs fp1, 12(src) + ps_muls0 fp13, fp13, fp0 + lfs fp2, 28(src) + ps_muls0 fp12, fp12, fp0 + lfs fp3, 44(src) + ps_muls0 fp11, fp11, fp0 + ps_merge00 fp5, fp13, fp12 + ps_muls0 fp10, fp10, fp0 + ps_merge11 fp4, fp13, fp12 + ps_muls0 fp9, fp9, fp0 + psq_st fp5, 0(inv), 0, 0 + ps_mul fp6, fp13, fp1 + psq_st fp4, 16(inv), 0, 0 + ps_muls0 fp8, fp8, fp0 + ps_madd fp6, fp12, fp2, fp6 + psq_st fp10, 32(inv), 1, 0 + ps_nmadd fp6, fp11, fp3, fp6 + psq_st fp9, 36(inv), 1, 0 + ps_mul fp7, fp10, fp1 + ps_merge00 fp5, fp11, fp6 + psq_st fp8, 40(inv), 1, 0 + ps_merge11 fp4, fp11, fp6 + psq_st fp5, 8(inv), 0, 0 + ps_madd fp7, fp9, fp2, fp7 + psq_st fp4, 24(inv), 0, 0 + ps_nmadd fp7, fp8, fp3, fp7 + addi r3, 0, 1 + psq_st fp7, 44(inv), 1, 0 + blr + // clang-format on +} +#endif + +u32 C_MTXInvXpose(const Mtx src, Mtx invX) +{ + Mtx mTmp; + MtxPtr m; + f32 det; + + if (src == invX) { + m = mTmp; + } + else { + m = invX; + } + + det = src[0][0] * src[1][1] * src[2][2] + src[0][1] * src[1][2] * src[2][0] + src[0][2] * src[1][0] * src[2][1] + - src[2][0] * src[1][1] * src[0][2] - src[1][0] * src[0][1] * src[2][2] - src[0][0] * src[2][1] * src[1][2]; + + if (det == 0.0f) { + return 0; + } + + det = 1.0f / det; + + m[0][0] = (src[1][1] * src[2][2] - src[2][1] * src[1][2]) * det; + m[0][1] = -(src[1][0] * src[2][2] - src[2][0] * src[1][2]) * det; + m[0][2] = (src[1][0] * src[2][1] - src[2][0] * src[1][1]) * det; + + m[1][0] = -(src[0][1] * src[2][2] - src[2][1] * src[0][2]) * det; + m[1][1] = (src[0][0] * src[2][2] - src[2][0] * src[0][2]) * det; + m[1][2] = -(src[0][0] * src[2][1] - src[2][0] * src[0][1]) * det; + + m[2][0] = (src[0][1] * src[1][2] - src[1][1] * src[0][2]) * det; + m[2][1] = -(src[0][0] * src[1][2] - src[1][0] * src[0][2]) * det; + m[2][2] = (src[0][0] * src[1][1] - src[1][0] * src[0][1]) * det; + + m[0][3] = 0.0F; + m[1][3] = 0.0F; + m[2][3] = 0.0F; + + if (m == mTmp) { + C_MTXCopy(mTmp, invX); + } + + return 1; +} + +#ifdef GEKKO +asm u32 PSMTXInvXpose(const register Mtx src, register Mtx invX) +{ + // clang-format off + nofralloc + + psq_l fp0, 0(src), 1, 0 + psq_l fp1, 4(src), 0, 0 + psq_l fp2, 16(src), 1, 0 + ps_merge10 fp6, fp1, fp0 + psq_l fp3, 20(src), 0, 0 + psq_l fp4, 32(src), 1, 0 + ps_merge10 fp7, fp3, fp2 + psq_l fp5, 36(src), 0, 0 + ps_mul fp11, fp3, fp6 + ps_merge10 fp8, fp5, fp4 + ps_mul fp13, fp5, fp7 + ps_msub fp11, fp1, fp7, fp11 + ps_mul fp12, fp1, fp8 + ps_msub fp13, fp3, fp8, fp13 + ps_msub fp12, fp5, fp6, fp12 + ps_mul fp10, fp3, fp4 + ps_mul fp9, fp0, fp5 + ps_mul fp8, fp1, fp2 + ps_msub fp10, fp2, fp5, fp10 + ps_msub fp9, fp1, fp4, fp9 + ps_msub fp8, fp0, fp3, fp8 + ps_mul fp7, fp0, fp13 + ps_sub fp1, fp1, fp1 + ps_madd fp7, fp2, fp12, fp7 + ps_madd fp7, fp4, fp11, fp7 + ps_cmpo0 cr0, fp7, fp1 + bne _regular + addi r3, 0, 0 + blr + +_regular: + fres fp0, fp7 + psq_st fp1, 12(invX), 1, 0 + ps_add fp6, fp0, fp0 + ps_mul fp5, fp0, fp0 + psq_st fp1, 28(invX), 1, 0 + ps_nmsub fp0, fp7, fp5, fp6 + psq_st fp1, 44(invX), 1, 0 + ps_muls0 fp13, fp13, fp0 + ps_muls0 fp12, fp12, fp0 + ps_muls0 fp11, fp11, fp0 + psq_st fp13, 0(invX), 0, 0 + psq_st fp12, 16(invX), 0, 0 + ps_muls0 fp10, fp10, fp0 + ps_muls0 fp9, fp9, fp0 + psq_st fp11, 32(invX), 0, 0 + psq_st fp10, 8(invX), 1, 0 + ps_muls0 fp8, fp8, fp0 + addi r3, 0, 1 + psq_st fp9, 24(invX), 1, 0 + psq_st fp8, 40(invX), 1, 0 + blr + // clang-format on +} +#endif + +void C_MTXRotRad(Mtx m, char axis, f32 rad) +{ + + f32 sinA, cosA; + sinA = sinf(rad); + cosA = cosf(rad); + C_MTXRotTrig(m, axis, sinA, cosA); +} + +#ifdef GEKKO +void PSMTXRotRad(Mtx m, char axis, f32 rad) +{ + // f32 sinA, cosA; + + // sinA = sinf(rad); + // cosA = cosf(rad); + + // PSMTXRotTrig(m, axis, sinA, cosA); +} +#endif + +void C_MTXRotTrig(Mtx m, char axis, f32 sinA, f32 cosA) +{ + switch (axis) { + + case 'x': + case 'X': + m[0][0] = 1.0f; + m[0][1] = 0.0f; + m[0][2] = 0.0f; + m[0][3] = 0.0f; + m[1][0] = 0.0f; + m[1][1] = cosA; + m[1][2] = -sinA; + m[1][3] = 0.0f; + m[2][0] = 0.0f; + m[2][1] = sinA; + m[2][2] = cosA; + m[2][3] = 0.0f; + break; + + case 'y': + case 'Y': + m[0][0] = cosA; + m[0][1] = 0.0f; + m[0][2] = sinA; + m[0][3] = 0.0f; + m[1][0] = 0.0f; + m[1][1] = 1.0f; + m[1][2] = 0.0f; + m[1][3] = 0.0f; + m[2][0] = -sinA; + m[2][1] = 0.0f; + m[2][2] = cosA; + m[2][3] = 0.0f; + break; + + case 'z': + case 'Z': + m[0][0] = cosA; + m[0][1] = -sinA; + m[0][2] = 0.0f; + m[0][3] = 0.0f; + m[1][0] = sinA; + m[1][1] = cosA; + m[1][2] = 0.0f; + m[1][3] = 0.0f; + m[2][0] = 0.0f; + m[2][1] = 0.0f; + m[2][2] = 1.0f; + m[2][3] = 0.0f; + break; + + default: + break; + } +} + +#ifdef GEKKO +void PSMTXRotTrig(register Mtx m, register char axis, register f32 sinA, register f32 cosA) +{ +// register f32 fc0, fc1, nsinA; +// register f32 fw0, fw1, fw2, fw3; +// // clang-format off +// asm +// { +// frsp sinA, sinA +// frsp cosA, cosA +// } + +// fc0 = 0.0F; +// fc1 = 1.0F; +// asm +// { +// ori axis, axis, 0x20 +// ps_neg nsinA, sinA +// cmplwi axis, 'x' +// beq _case_x +// cmplwi axis, 'y' +// beq _case_y +// cmplwi axis, 'z' +// beq _case_z +// b _end + +// _case_x: +// psq_st fc1, 0(m), 1, 0 +// psq_st fc0, 4(m), 0, 0 +// ps_merge00 fw0, sinA, cosA +// psq_st fc0, 12(m), 0, 0 +// ps_merge00 fw1, cosA, nsinA +// psq_st fc0, 28(m), 0, 0 +// psq_st fc0, 44(m), 1, 0 +// psq_st fw0, 36(m), 0, 0 +// psq_st fw1, 20(m), 0, 0 +// b _end; + +// _case_y: +// ps_merge00 fw0, cosA, fc0 +// ps_merge00 fw1, fc0, fc1 +// psq_st fc0, 24(m), 0, 0 +// psq_st fw0, 0(m), 0, 0 +// ps_merge00 fw2, nsinA, fc0 +// ps_merge00 fw3, sinA, fc0 +// psq_st fw0, 40(m), 0, 0; +// psq_st fw1, 16(m), 0, 0; +// psq_st fw3, 8(m), 0, 0; +// psq_st fw2, 32(m), 0, 0; +// b _end; + +// _case_z: +// psq_st fc0, 8(m), 0, 0 +// ps_merge00 fw0, sinA, cosA +// ps_merge00 fw2, cosA, nsinA +// psq_st fc0, 24(m), 0, 0 +// psq_st fc0, 32(m), 0, 0 +// ps_merge00 fw1, fc1, fc0 +// psq_st fw0, 16(m), 0, 0 +// psq_st fw2, 0(m), 0, 0 +// psq_st fw1, 40(m), 0, 0 + +// _end: +// } +// // clang-format on +} + +#endif + +void C_MTXRotAxisRad(Mtx m, const Vec *axis, f32 rad) +{ + Vec vN; + f32 s, c; + f32 t; + f32 x, y, z; + f32 xSq, ySq, zSq; + + s = sinf(rad); + c = cosf(rad); + t = 1.0f - c; + + C_VECNormalize(axis, &vN); + + x = vN.x; + y = vN.y; + z = vN.z; + + xSq = x * x; + ySq = y * y; + zSq = z * z; + + m[0][0] = (t * xSq) + (c); + m[0][1] = (t * x * y) - (s * z); + m[0][2] = (t * x * z) + (s * y); + m[0][3] = 0.0f; + + m[1][0] = (t * x * y) + (s * z); + m[1][1] = (t * ySq) + (c); + m[1][2] = (t * y * z) - (s * x); + m[1][3] = 0.0f; + + m[2][0] = (t * x * z) - (s * y); + m[2][1] = (t * y * z) + (s * x); + m[2][2] = (t * zSq) + (c); + m[2][3] = 0.0f; +} + +#ifdef GEKKO +static void __PSMTXRotAxisRadInternal(register Mtx m, const register Vec *axis, register f32 sT, register f32 cT) +{ + register f32 tT, fc0; + register f32 tmp0, tmp1, tmp2, tmp3, tmp4; + register f32 tmp5, tmp6, tmp7, tmp8, tmp9; + + tmp9 = 0.5F; + tmp8 = 3.0F; + // clang-format off + asm + { + frsp cT, cT + psq_l tmp0, 0(axis), 0, 0 + frsp sT, sT + lfs tmp1, 8(axis) + ps_mul tmp2, tmp0, tmp0 + fadds tmp7, tmp9, tmp9 + ps_madd tmp3, tmp1, tmp1, tmp2 + fsubs fc0, tmp9, tmp9 + ps_sum0 tmp4, tmp3, tmp1, tmp2 + fsubs tT, tmp7, cT + frsqrte tmp5, tmp4 + fmuls tmp2, tmp5, tmp5 + fmuls tmp3, tmp5, tmp9 + fnmsubs tmp2, tmp2, tmp4, tmp8 + fmuls tmp5, tmp2, tmp3 + ps_merge00 cT, cT, cT + ps_muls0 tmp0, tmp0, tmp5 + ps_muls0 tmp1, tmp1, tmp5 + ps_muls0 tmp4, tmp0, tT + ps_muls0 tmp9, tmp0, sT + ps_muls0 tmp5, tmp1, tT + ps_muls1 tmp3, tmp4, tmp0 + ps_muls0 tmp2, tmp4, tmp0 + ps_muls0 tmp4, tmp4, tmp1 + fnmsubs tmp6, tmp1, sT, tmp3 + fmadds tmp7, tmp1, sT, tmp3 + ps_neg tmp0, tmp9 + ps_sum0 tmp8, tmp4, fc0, tmp9 + ps_sum0 tmp2, tmp2, tmp6, cT + ps_sum1 tmp3, cT, tmp7, tmp3 + ps_sum0 tmp6, tmp0, fc0 ,tmp4 + psq_st tmp8, 8(m), 0, 0 + ps_sum0 tmp0, tmp4, tmp4, tmp0 + psq_st tmp2, 0(m), 0, 0 + ps_muls0 tmp5, tmp5, tmp1 + psq_st tmp3, 16(m), 0, 0 + ps_sum1 tmp4, tmp9, tmp0, tmp4 + psq_st tmp6, 24(m), 0, 0 + ps_sum0 tmp5, tmp5, fc0, cT + psq_st tmp4, 32(m), 0, 0 + psq_st tmp5, 40(m), 0, 0 + } + // clang-format on +} + +void PSMTXRotAxisRad(Mtx m, const Vec *axis, f32 rad) +{ + // f32 sinT, cosT; + + // sinT = sinf(rad); + // cosT = cosf(rad); + + // __PSMTXRotAxisRadInternal(m, axis, sinT, cosT); +} + +#endif + +void C_MTXTrans(Mtx m, f32 xT, f32 yT, f32 zT) +{ + m[0][0] = 1.0f; + m[0][1] = 0.0f; + m[0][2] = 0.0f; + m[0][3] = xT; + m[1][0] = 0.0f; + m[1][1] = 1.0f; + m[1][2] = 0.0f; + m[1][3] = yT; + m[2][0] = 0.0f; + m[2][1] = 0.0f; + m[2][2] = 1.0f; + m[2][3] = zT; +} + +#ifdef GEKKO +void PSMTXTrans(register Mtx m, register f32 xT, register f32 yT, register f32 zT) +{ + register f32 c0 = 0.0F; + register f32 c1 = 1.0F; + // clang-format off + asm + { + stfs xT, 12(m) + stfs yT, 28(m) + psq_st c0, 4(m), 0, 0 + psq_st c0, 32(m), 0, 0 + stfs c0, 16(m) + stfs c1, 20(m) + stfs c0, 24(m) + stfs c1, 40(m) + stfs zT, 44(m) + stfs c1, 0(m) + } + // clang-format on +} +#endif + +void C_MTXTransApply(const Mtx src, Mtx dst, f32 xT, f32 yT, f32 zT) +{ + if (src != dst) { + dst[0][0] = src[0][0]; + dst[0][1] = src[0][1]; + dst[0][2] = src[0][2]; + dst[1][0] = src[1][0]; + dst[1][1] = src[1][1]; + dst[1][2] = src[1][2]; + dst[2][0] = src[2][0]; + dst[2][1] = src[2][1]; + dst[2][2] = src[2][2]; + } + + dst[0][3] = src[0][3] + xT; + dst[1][3] = src[1][3] + yT; + dst[2][3] = src[2][3] + zT; +} + +#ifdef GEKKO +asm void PSMTXTransApply(const register Mtx src, register Mtx dst, register f32 xT, register f32 yT, register f32 zT) +{ + // clang-format off + nofralloc; + psq_l fp4, 0(src), 0, 0 + frsp xT, xT + psq_l fp5, 8(src), 0, 0 + frsp yT, yT + psq_l fp7, 24(src), 0, 0 + frsp zT, zT + psq_l fp8, 40(src), 0, 0 + psq_st fp4, 0(dst), 0, 0 + ps_sum1 fp5, xT, fp5, fp5 + psq_l fp6, 16(src), 0, 0 + psq_st fp5, 8(dst), 0, 0 + ps_sum1 fp7, yT, fp7, fp7 + psq_l fp9, 32(src), 0, 0 + psq_st fp6, 16(dst), 0, 0 + ps_sum1 fp8, zT, fp8, fp8 + psq_st fp7, 24(dst), 0, 0 + psq_st fp9, 32(dst), 0, 0 + psq_st fp8, 40(dst), 0, 0 + blr + // clang-format on +} +#endif + +void C_MTXScale(Mtx m, f32 xS, f32 yS, f32 zS) +{ + m[0][0] = xS; + m[0][1] = 0.0f; + m[0][2] = 0.0f; + m[0][3] = 0.0f; + m[1][0] = 0.0f; + m[1][1] = yS; + m[1][2] = 0.0f; + m[1][3] = 0.0f; + m[2][0] = 0.0f; + m[2][1] = 0.0f; + m[2][2] = zS; + m[2][3] = 0.0f; +} + +#ifdef GEKKO +void PSMTXScale(register Mtx m, register f32 xS, register f32 yS, register f32 zS) +{ + register f32 c0 = 0.0F; + // clang-format off + asm + { + stfs xS, 0(m) + psq_st c0, 4(m), 0, 0 + psq_st c0, 12(m), 0, 0 + stfs yS, 20(m) + psq_st c0, 24(m), 0, 0 + psq_st c0, 32(m), 0, 0 + stfs zS, 40(m) + stfs c0, 44(m) + } + // clang-format on +} +#endif + +void C_MTXScaleApply(const Mtx src, Mtx dst, f32 xS, f32 yS, f32 zS) +{ + dst[0][0] = src[0][0] * xS; + dst[0][1] = src[0][1] * xS; + dst[0][2] = src[0][2] * xS; + dst[0][3] = src[0][3] * xS; + + dst[1][0] = src[1][0] * yS; + dst[1][1] = src[1][1] * yS; + dst[1][2] = src[1][2] * yS; + dst[1][3] = src[1][3] * yS; + + dst[2][0] = src[2][0] * zS; + dst[2][1] = src[2][1] * zS; + dst[2][2] = src[2][2] * zS; + dst[2][3] = src[2][3] * zS; +} + +#ifdef GEKKO +asm void PSMTXScaleApply(const register Mtx src, register Mtx dst, register f32 xS, register f32 yS, register f32 zS) +{ + // clang-format off + nofralloc; + frsp xS, xS + psq_l fp4, 0(src), 0, 0 + frsp yS, yS + psq_l fp5, 8(src), 0, 0 + frsp zS, zS + ps_muls0 fp4, fp4, xS + psq_l fp6, 16(src), 0, 0 + ps_muls0 fp5, fp5, xS + psq_l fp7, 24(src), 0, 0 + ps_muls0 fp6, fp6, yS + psq_l fp8, 32(src), 0, 0 + psq_st fp4, 0(dst), 0, 0 + ps_muls0 fp7, fp7, yS + psq_l fp2, 40(src), 0, 0 + psq_st fp5, 8(dst), 0, 0 + ps_muls0 fp8, fp8, zS + psq_st fp6, 16(dst), 0, 0 + ps_muls0 fp2, fp2, zS + psq_st fp7, 24(dst), 0, 0 + psq_st fp8, 32(dst), 0, 0 + psq_st fp2, 40(dst), 0, 0 + blr + // clang-format on +} +#endif + +void C_MTXQuat(Mtx m, const Quaternion *q) +{ + + f32 s, xs, ys, zs, wx, wy, wz, xx, xy, xz, yy, yz, zz; + s = 2.0f / ((q->x * q->x) + (q->y * q->y) + (q->z * q->z) + (q->w * q->w)); + + xs = q->x * s; + ys = q->y * s; + zs = q->z * s; + wx = q->w * xs; + wy = q->w * ys; + wz = q->w * zs; + xx = q->x * xs; + xy = q->x * ys; + xz = q->x * zs; + yy = q->y * ys; + yz = q->y * zs; + zz = q->z * zs; + + m[0][0] = 1.0f - (yy + zz); + m[0][1] = xy - wz; + m[0][2] = xz + wy; + m[0][3] = 0.0f; + + m[1][0] = xy + wz; + m[1][1] = 1.0f - (xx + zz); + m[1][2] = yz - wx; + m[1][3] = 0.0f; + + m[2][0] = xz - wy; + m[2][1] = yz + wx; + m[2][2] = 1.0f - (xx + yy); + m[2][3] = 0.0f; +} + +#ifdef GEKKO +void PSMTXQuat(register Mtx m, const register Quaternion *q) +{ + register f32 c_zero, c_one, c_two, scale; + register f32 tmp0, tmp1, tmp2, tmp3, tmp4; + register f32 tmp5, tmp6, tmp7, tmp8, tmp9; + + c_one = 1.0F; + // clang-format off + asm + { + psq_l tmp0, 0(q), 0, 0 + psq_l tmp1, 8(q), 0, 0 + fsubs c_zero, c_one, c_one + fadds c_two, c_one, c_one + ps_mul tmp2, tmp0, tmp0 + ps_merge10 tmp5, tmp0, tmp0 + ps_madd tmp4, tmp1, tmp1, tmp2 + ps_mul tmp3, tmp1, tmp1 + ps_sum0 scale, tmp4, tmp4, tmp4 + ps_muls1 tmp7, tmp5, tmp1 + fres tmp9, scale + ps_sum1 tmp4, tmp3, tmp4, tmp2 + ps_nmsub scale, scale, tmp9, c_two + ps_muls1 tmp6, tmp1, tmp1 + ps_mul scale, tmp9, scale + ps_sum0 tmp2, tmp2, tmp2, tmp2 + fmuls scale, scale, c_two + ps_madd tmp8, tmp0, tmp5, tmp6 + ps_msub tmp6, tmp0, tmp5, tmp6 + psq_st c_zero, 12(m), 1, 0 + ps_nmsub tmp2, tmp2, scale, c_one + ps_nmsub tmp4, tmp4, scale, c_one + psq_st c_zero, 44(m), 1, 0 + ps_mul tmp8, tmp8, scale + ps_mul tmp6, tmp6, scale + psq_st tmp2, 40(m), 1, 0 + ps_madds0 tmp5, tmp0, tmp1, tmp7 + ps_merge00 tmp1, tmp8, tmp4 + ps_nmsub tmp7, tmp7, c_two, tmp5 + ps_merge10 tmp0, tmp4, tmp6 + psq_st tmp1, 16(m), 0, 0 + ps_mul tmp5, tmp5, scale + ps_mul tmp7, tmp7, scale + psq_st tmp0, 0(m), 0, 0 + psq_st tmp5, 8(m), 1, 0 + ps_merge10 tmp3, tmp7, c_zero + ps_merge01 tmp9, tmp7, tmp5 + psq_st tmp3, 24(m), 0, 0 + psq_st tmp9, 32(m), 0, 0 + } + // clang-format on +} +#endif + +void C_MTXReflect(Mtx m, const Vec *p, const Vec *n) +{ + f32 vxy, vxz, vyz, pdotn; + + vxy = -2.0f * n->x * n->y; + vxz = -2.0f * n->x * n->z; + vyz = -2.0f * n->y * n->z; + pdotn = 2.0f * C_VECDotProduct(p, n); + + m[0][0] = 1.0f - 2.0f * n->x * n->x; + m[0][1] = vxy; + m[0][2] = vxz; + m[0][3] = pdotn * n->x; + + m[1][0] = vxy; + m[1][1] = 1.0f - 2.0f * n->y * n->y; + m[1][2] = vyz; + m[1][3] = pdotn * n->y; + + m[2][0] = vxz; + m[2][1] = vyz; + m[2][2] = 1.0f - 2.0f * n->z * n->z; + m[2][3] = pdotn * n->z; +} + +#ifdef GEKKO +void PSMTXReflect(register Mtx m, const register Vec *p, const register Vec *n) +{ + register f32 c_one = 1.0F; + register f32 vn_xy, vn_z1, n2vn_xy, n2vn_z1, pdotn; + register f32 tmp0, tmp1, tmp2, tmp3; + register f32 tmp4, tmp5, tmp6, tmp7; + // clang-format off + asm + { + psq_l vn_z1, 8(n), 1, 0 + psq_l vn_xy, 0(n), 0, 0 + psq_l tmp0, 0(p), 0, 0 + ps_nmadd n2vn_z1, vn_z1, c_one, vn_z1 + psq_l tmp1, 8(p), 1, 0 + ps_nmadd n2vn_xy, vn_xy, c_one, vn_xy + ps_muls0 tmp4, vn_xy, n2vn_z1 + ps_mul pdotn, n2vn_xy, tmp0 + ps_muls0 tmp2, vn_xy, n2vn_xy + ps_sum0 pdotn, pdotn, pdotn, pdotn + ps_muls1 tmp3, vn_xy, n2vn_xy + psq_st tmp4, 32(m), 0, 0 + ps_sum0 tmp2, tmp2, tmp2, c_one + ps_nmadd pdotn, n2vn_z1, tmp1, pdotn + ps_sum1 tmp3, c_one, tmp3, tmp3 + psq_st tmp2, 0(m), 0, 0 + ps_muls0 tmp5, vn_xy, pdotn + ps_merge00 tmp6, n2vn_z1, pdotn + psq_st tmp3, 16(m), 0, 0 + ps_merge00 tmp7, tmp4, tmp5 + ps_muls0 tmp6, tmp6, vn_z1 + ps_merge11 tmp5, tmp4, tmp5 + psq_st tmp7, 8(m), 0, 0 + ps_sum0 tmp6, tmp6, tmp6, c_one + psq_st tmp5, 24(m), 0, 0 + psq_st tmp6, 40(m), 0, 0 + } + // clang-format on +} +#endif + +void C_MTXLookAt(Mtx m, const Point3d *camPos, const Vec *camUp, const Point3d *target) +{ + // Vec vLook, vRight, vUp; + + // vLook.x = camPos->x - target->x; + // vLook.y = camPos->y - target->y; + // vLook.z = camPos->z - target->z; + // VECNormalize(&vLook, &vLook); + // VECCrossProduct(camUp, &vLook, &vRight); + // VECNormalize(&vRight, &vRight); + // VECCrossProduct(&vLook, &vRight, &vUp); + + // m[0][0] = vRight.x; + // m[0][1] = vRight.y; + // m[0][2] = vRight.z; + // m[0][3] = -(camPos->x * vRight.x + camPos->y * vRight.y + camPos->z * vRight.z); + + // m[1][0] = vUp.x; + // m[1][1] = vUp.y; + // m[1][2] = vUp.z; + // m[1][3] = -(camPos->x * vUp.x + camPos->y * vUp.y + camPos->z * vUp.z); + + // m[2][0] = vLook.x; + // m[2][1] = vLook.y; + // m[2][2] = vLook.z; + // m[2][3] = -(camPos->x * vLook.x + camPos->y * vLook.y + camPos->z * vLook.z); +} + +void C_MTXLightFrustum(Mtx m, float t, float b, float l, float r, float n, float scaleS, float scaleT, float transS, float transT) +{ + f32 tmp; + + tmp = 1.0f / (r - l); + m[0][0] = ((2 * n) * tmp) * scaleS; + m[0][1] = 0.0f; + m[0][2] = (((r + l) * tmp) * scaleS) - transS; + m[0][3] = 0.0f; + + tmp = 1.0f / (t - b); + m[1][0] = 0.0f; + m[1][1] = ((2 * n) * tmp) * scaleT; + m[1][2] = (((t + b) * tmp) * scaleT) - transT; + m[1][3] = 0.0f; + + m[2][0] = 0.0f; + m[2][1] = 0.0f; + m[2][2] = -1.0f; + m[2][3] = 0.0f; +} + +void C_MTXLightPerspective(Mtx m, f32 fovY, f32 aspect, float scaleS, float scaleT, float transS, float transT) +{ + // f32 angle; + // f32 cot; + + // angle = fovY * 0.5f; + // angle = MTXDegToRad(angle); + + // cot = 1.0f / tanf(angle); + + // m[0][0] = (cot / aspect) * scaleS; + // m[0][1] = 0.0f; + // m[0][2] = -transS; + // m[0][3] = 0.0f; + + // m[1][0] = 0.0f; + // m[1][1] = cot * scaleT; + // m[1][2] = -transT; + // m[1][3] = 0.0f; + + // m[2][0] = 0.0f; + // m[2][1] = 0.0f; + // m[2][2] = -1.0f; + // m[2][3] = 0.0f; +} + +void C_MTXLightOrtho(Mtx m, f32 t, f32 b, f32 l, f32 r, float scaleS, float scaleT, float transS, float transT) +{ + f32 tmp; + tmp = 1.0f / (r - l); + m[0][0] = (2.0f * tmp * scaleS); + m[0][1] = 0.0f; + m[0][2] = 0.0f; + m[0][3] = ((-(r + l) * tmp) * scaleS) + transS; + + tmp = 1.0f / (t - b); + m[1][0] = 0.0f; + m[1][1] = (2.0f * tmp) * scaleT; + m[1][2] = 0.0f; + m[1][3] = ((-(t + b) * tmp) * scaleT) + transT; + + m[2][0] = 0.0f; + m[2][1] = 0.0f; + m[2][2] = 0.0f; + m[2][3] = 1.0f; +} diff --git a/src/dolphin/mtx/mtx44.c b/src/dolphin/mtx/mtx44.c new file mode 100644 index 00000000..a6c2faa9 --- /dev/null +++ b/src/dolphin/mtx/mtx44.c @@ -0,0 +1,99 @@ +#include "dolphin/mtx.h" +#include "math.h" + +void C_MTXFrustum(Mtx44 m, f32 arg1, f32 arg2, f32 arg3, f32 arg4, f32 arg5, f32 arg6) +{ + f32 tmp = 1.0f / (arg4 - arg3); + m[0][0] = (2 * arg5) * tmp; + m[0][1] = 0.0f; + m[0][2] = (arg4 + arg3) * tmp; + m[0][3] = 0.0f; + tmp = 1.0f / (arg1 - arg2); + m[1][0] = 0.0f; + m[1][1] = (2 * arg5) * tmp; + m[1][2] = (arg1 + arg2) * tmp; + m[1][3] = 0.0f; + m[2][0] = 0.0f; + m[2][1] = 0.0f; + tmp = 1.0f / (arg6 - arg5); + m[2][2] = -(arg5)*tmp; + m[2][3] = -(arg6 * arg5) * tmp; + m[3][0] = 0.0f; + m[3][1] = 0.0f; + m[3][2] = -1.0f; + m[3][3] = 0.0f; +} + +// Functions match but has issues with float constants +void C_MTXPerspective(Mtx44 m, f32 fovY, f32 aspect, f32 n, f32 f) +{ + f32 angle = fovY * 0.5f; + f32 cot; + f32 tmp; + angle = MTXDegToRad(angle); + cot = 1.0f / tanf(angle); + m[0][0] = cot / aspect; + m[0][1] = 0.0f; + m[0][2] = 0.0f; + m[0][3] = 0.0f; + m[1][0] = 0.0f; + m[1][1] = cot; + m[1][2] = 0.0f; + m[1][3] = 0.0f; + m[2][0] = 0.0f; + m[2][1] = 0.0f; + tmp = 1.0f / (f - n); + m[2][2] = -(n)*tmp; + m[2][3] = -(f * n) * tmp; + m[3][0] = 0.0f; + m[3][1] = 0.0f; + m[3][2] = -1.0f; + m[3][3] = 0.0f; +} + +void C_MTXOrtho(Mtx44 m, f32 t, f32 b, f32 l, f32 r, f32 n, f32 f) +{ + f32 tmp = 1.0f / (r - l); + m[0][0] = 2.0f * tmp; + m[0][1] = 0.0f; + m[0][2] = 0.0f; + m[0][3] = -(r + l) * tmp; + tmp = 1.0f / (t - b); + m[1][0] = 0.0f; + m[1][1] = 2.0f * tmp; + m[1][2] = 0.0f; + m[1][3] = -(t + b) * tmp; + m[2][0] = 0.0f; + m[2][1] = 0.0f; + tmp = 1.0f / (f - n); + m[2][2] = -(1.0f) * tmp; + m[2][3] = -(f)*tmp; + m[3][0] = 0.0f; + m[3][1] = 0.0f; + m[3][2] = 0.0f; + m[3][3] = 1.0f; +} + +asm void PSMTX44Copy(register Mtx44 src, register Mtx44 dest) +{ +#ifdef __MWERKS__ // clang-format off + nofralloc; + psq_l fp1, 0(src), 0, 0; + psq_st fp1, 0(dest), 0, 0; + psq_l fp1, 8(src), 0, 0; + psq_st fp1, 8(dest), 0, 0; + psq_l fp1, 0x10(src), 0, 0; + psq_st fp1, 0x10(dest), 0, 0; + psq_l fp1, 0x18(src), 0, 0; + psq_st fp1, 0x18(dest), 0, 0; + psq_l fp1, 0x20(src), 0, 0; + psq_st fp1, 0x20(dest), 0, 0; + psq_l fp1, 0x28(src), 0, 0; + psq_st fp1, 0x28(dest), 0, 0; + psq_l fp1, 0x30(src), 0, 0; + psq_st fp1, 0x30(dest), 0, 0; + psq_l fp1, 0x38(src), 0, 0; + psq_st fp1, 0x38(dest), 0, 0; + blr; +#endif // clang-format on +} diff --git a/src/dolphin/mtx/mtxvec.c b/src/dolphin/mtx/mtxvec.c new file mode 100644 index 00000000..d4e70ab0 --- /dev/null +++ b/src/dolphin/mtx/mtxvec.c @@ -0,0 +1,146 @@ +#include "dolphin/mtx.h" + +asm void PSMTXMultVec(const register Mtx m, const register Vec* in, register Vec* out) { +#ifdef __MWERKS__ // clang-format off + nofralloc; + psq_l fp0, 0(in), 0, 0; + psq_l fp2, 0(m), 0, 0; + psq_l fp1, 8(in), 1, 0; + ps_mul fp4, fp2, fp0; + psq_l fp3, 8(m), 0, 0; + ps_madd fp5, fp3, fp1, fp4; + psq_l fp8, 16(m), 0, 0; + ps_sum0 fp6, fp5, fp6, fp5; + psq_l fp9, 24(m), 0, 0; + ps_mul fp10, fp8, fp0; + psq_st fp6, 0(out), 1, 0; + ps_madd fp11, fp9, fp1, fp10; + psq_l fp2, 32(m), 0, 0; + ps_sum0 fp12, fp11, fp12, fp11; + psq_l fp3, 40(m), 0, 0; + ps_mul fp4, fp2, fp0; + psq_st fp12, 4(out), 1, 0; + ps_madd fp5, fp3, fp1, fp4; + ps_sum0 fp6, fp5, fp6, fp5; + psq_st fp6, 8(out), 1, 0; + blr +#endif // clang-format on +} + +asm void PSMTXMultVecArray(register const Mtx m, register const Vec* srcBase, register Vec* dstBase, + register u32 count) { +#ifdef __MWERKS__ // clang-format off + nofralloc + + psq_l f13, 0(m), 0, 0 + psq_l f12, 16(m), 0, 0 + addi count, count, -1 + psq_l f11, 8(m), 0, 0 + ps_merge00 f0, f13, f12 + addi dstBase, dstBase, -4 + psq_l f10, 24(m), 0, 0 + ps_merge11 f1, f13, f12 + mtctr count + psq_l f4, 32(m), 0, 0 + ps_merge00 f2, f11, f10 + psq_l f5, 40(m), 0, 0 + ps_merge11 f3, f11, f10 + psq_l f6, 0(srcBase), 0, 0 + psq_lu f7, 8(srcBase), 1, 0 + ps_madds0 f8, f0, f6, f3 + ps_mul f9, f4, f6 + ps_madds1 f8, f1, f6, f8 + ps_madd f10, f5, f7, f9 + +lbl_80346E0C: + psq_lu f6, 4(srcBase), 0, 0 + ps_madds0 f12, f2, f7, f8 + psq_lu f7, 8(srcBase), 1, 0 + ps_sum0 f13, f10, f9, f10 + ps_madds0 f8, f0, f6, f3 + ps_mul f9, f4, f6 + psq_stu f12, 4(dstBase), 0, 0 + ps_madds1 f8, f1, f6, f8 + psq_stu f13, 8(dstBase), 1, 0 + ps_madd f10, f5, f7, f9 + bdnz lbl_80346E0C + + ps_madds0 f12, f2, f7, f8 + ps_sum0 f13, f10, f9, f10 + psq_stu f12, 4(dstBase), 0, 0 + psq_stu f13, 8(dstBase), 1, 0 + blr +#endif // clang-format on +} + +asm void PSMTXMultVecSR(const register Mtx mtx, const register Vec* in, register Vec* out) { +#ifdef __MWERKS__ // clang-format off + nofralloc; + psq_l fp0, 0(mtx), 0, 0; + psq_l fp6, 0(in), 0, 0; + psq_l fp2, 0x10(mtx), 0, 0; + ps_mul fp8, fp0, fp6; + psq_l fp4, 0x20(mtx), 0, 0; + ps_mul fp10, fp2, fp6; + psq_l fp7, 8(in), 1, 0; + ps_mul fp12, fp4, fp6; + psq_l fp3, 0x18(mtx), 0, 0; + ps_sum0 fp8, fp8, fp8, fp8; + psq_l fp5, 0x28(mtx), 0, 0; + ps_sum0 fp10, fp10, fp10, fp10; + psq_l fp1, 8(mtx), 0, 0; + ps_sum0 fp12, fp12, fp12, fp12; + ps_madd fp9, fp1, fp7, fp8; + psq_st fp9, 0(out), 1, 0; + ps_madd fp11, fp3, fp7, fp10; + psq_st fp11, 4(out), 1, 0; + ps_madd fp13, fp5, fp7, fp12; + psq_st fp13, 8(out), 1, 0; + blr +#endif // clang-format on +} + +asm void PSMTXMultVecArraySR(register const Mtx m, register const Vec* srcBase, + register Vec* dstBase, register u32 count) { +#ifdef __MWERKS__ // clang-format off + nofralloc + + psq_l f13, 0(m), 0, 0 + psq_l f12, 16(m), 0, 0 + addi count, count, -1 + psq_l f11, 8(m), 1, 0 + ps_merge00 f0, f13, f12 + addi dstBase, dstBase, -4 + psq_l f10, 24(m), 1, 0 + ps_merge11 f1, f13, f12 + mtctr count + psq_l f3, 32(m), 0, 0 + ps_merge00 f2, f11, f10 + psq_l f4, 40(m), 1, 0 + psq_l f6, 0(srcBase), 0, 0 + psq_lu f7, 8(srcBase), 1, 0 + ps_muls0 f8, f0, f6 + ps_mul f9, f3, f6 + ps_madds1 f8, f1, f6, f8 + ps_madd f10, f4, f7, f9 + +lbl_80346EE8: + psq_lu f6, 4(srcBase), 0, 0 + ps_madds0 f12, f2, f7, f8 + psq_lu f7, 8(srcBase), 1, 0 + ps_sum0 f13, f10, f9, f9 + ps_muls0 f8, f0, f6 + ps_mul f9, f3, f6 + psq_stu f12, 4(dstBase), 0, 0 + ps_madds1 f8, f1, f6, f8 + psq_stu f13, 8(dstBase), 1, 0 + ps_madd f10, f4, f7, f9 + bdnz lbl_80346EE8 + + ps_madds0 f12, f2, f7, f8 + ps_sum0 f13, f10, f9, f9 + psq_stu f12, 4(dstBase), 0, 0 + psq_stu f13, 8(dstBase), 1, 0 + blr +#endif // clang-format on +} diff --git a/src/dolphin/mtx/psmtx.c b/src/dolphin/mtx/psmtx.c new file mode 100644 index 00000000..0a6c5276 --- /dev/null +++ b/src/dolphin/mtx/psmtx.c @@ -0,0 +1,355 @@ +#include +#include + +#define qr0 0 +#define qr1 1 +#define qr6 6 + +asm void PSMTXReorder(const register Mtx src, register ROMtx dest) +{ + /* clang-format off */ + psq_l f0, 0(src), 0, qr0 + psq_l f2, 16(src), 0, qr0 + psq_l f4, 32(src), 0, qr0 + psq_l f1, 8(src), 0, qr0 + ps_merge00 f6, f0, f2 + psq_l f3, 24(src), 0, qr0 + ps_merge01 f12, f4, f0 + psq_l f5, 40(src), 0, qr0 + ps_merge11 f7, f2, f4 + psq_st f6, 0(dest), 0, qr0 + ps_merge00 f8, f1, f3 + psq_st f12, 8(dest), 0, qr0 + ps_merge01 f9, f5, f1 + psq_st f7, 16(dest), 0, qr0 + ps_merge11 f10, f3, f5 + psq_st f8, 24(dest), 0, qr0 + psq_st f9, 32(dest), 0, qr0 + psq_st f10, 40(dest), 0, qr0 + /* clang-format on */ +} + +asm void PSMTXROMultVecArray(const register ROMtx m, const register Vec *srcBase, register Vec *dstBase, register u32 count) +{ + /* clang-format off */ + nofralloc + stwu r1, -64(r1) + stfd f14, 8(r1) + subi r7, count, 1 + stfd f15, 16(r1) + srwi r7, r7, 1 + stfd f16, 24(r1) + stfd f17, 32(r1) + stfd f18, 40(r1) + mtctr r7 + psq_l f0, 0(m), 0, qr0 + subi srcBase, srcBase, 8 + psq_l f1, 8(m), 1, qr0 + subi dstBase, dstBase, 4 + psq_l f6, 36(m), 0, qr0 + psq_lu f8, 8(srcBase), 0, qr0 + psq_l f7, 44(m), 1, qr0 + psq_lu f9, 8(srcBase), 0, qr0 + ps_madds0 f11, f0, f8, f6 + psq_l f2, 12(m), 0, qr0 + ps_madds0 f12, f1, f8, f7 + psq_l f3, 20(m), 1, qr0 + ps_madds1 f13, f0, f9, f6 + psq_lu f10, 8(srcBase), 0, qr0 + ps_madds1 f14, f1, f9, f7 + psq_l f5, 32(m), 1, qr0 + ps_madds1 f11, f2, f8, f11 + ps_madds1 f12, f3, f8, f12 + psq_l f4, 24(m), 0, qr0 + ps_madds0 f13, f2, f10, f13 + psq_lu f8, 8(srcBase), 0, qr0 + ps_madds0 f14, f3, f10, f14 + ps_madds0 f15, f4, f9, f11 + ps_madds0 f16, f5, f9, f12 + psq_lu f9, 8(srcBase), 0, qr0 + ps_madds1 f17, f4, f10, f13 + ps_madds1 f18, f5, f10, f14 + psq_lu f10, 8(srcBase), 0, qr0 +loop: + ps_madds0 f11, f0, f8, f6 + psq_stu f15, 4(dstBase), 0, qr0 + ps_madds0 f12, f1, f8, f7 + psq_stu f16, 8(dstBase), 1, qr0 + ps_madds1 f13, f0, f9, f6 + psq_stu f17, 4(dstBase), 0, qr0 + ps_madds1 f14, f1, f9, f7 + psq_stu f18, 8(dstBase), 1, qr0 + ps_madds1 f11, f2, f8, f11 + ps_madds1 f12, f3, f8, f12 + psq_lu f8, 8(srcBase), 0, qr0 + ps_madds0 f13, f2, f10, f13 + ps_madds0 f14, f3, f10, f14 + ps_madds0 f15, f4, f9, f11 + ps_madds0 f16, f5, f9, f12 + psq_lu f9, 8(srcBase), 0, qr0 + ps_madds1 f17, f4, f10, f13 + ps_madds1 f18, f5, f10, f14 + psq_lu f10, 8(srcBase), 0, qr0 + bdnz loop + psq_stu f15, 4(dstBase), 0, qr0 + clrlwi. r7, count, 31 + psq_stu f16, 8(dstBase), 1, qr0 + bne exit + psq_stu f17, 4(dstBase), 0, qr0 + psq_stu f18, 8(dstBase), 1, qr0 +exit: + lfd f14, 8(r1) + lfd f15, 16(r1) + lfd f16, 24(r1) + lfd f17, 32(r1) + lfd f18, 40(r1) + addi r1, r1, 64 + blr + /* clang-format on */ +} + +asm void PSMTXROSkin2VecArray(const register ROMtx m0, const register ROMtx m1, const register f32 *wtBase, const register Vec *srcBase, + register Vec *dstBase, register u32 count) +{ + /* clang-format off */ + nofralloc + stwu r1, -160(r1) + stfd f14, 8(r1) + stfd f15, 16(r1) + stfd f16, 24(r1) + stfd f17, 32(r1) + stfd f18, 40(r1) + stfd f19, 48(r1) + stfd f20, 56(r1) + stfd f21, 64(r1) + stfd f22, 72(r1) + stfd f23, 80(r1) + stfd f24, 88(r1) + stfd f25, 96(r1) + stfd f26, 104(r1) + stfd f27, 112(r1) + stfd f28, 120(r1) + stfd f29, 128(r1) + stfd f30, 136(r1) + subi r9, r8, 1 + mtctr r9 + subi srcBase, srcBase, 4 + subi dstBase, dstBase, 4 + subi wtBase, wtBase, 4 + psq_l f14, 0(m0), 0, qr0 + psq_l f22, 0(m1), 0, qr0 + psq_l f15, 8(m0), 1, qr0 + psq_l f23, 8(m1), 1, qr0 + psq_l f16, 12(m0), 0, qr0 + psq_l f24, 12(m1), 0, qr0 + ps_sub f22, f22, f14 + psq_l f17, 20(m0), 1, qr0 + psq_l f25, 20(m1), 1, qr0 + ps_sub f23, f23, f15 + psq_l f18, 24(m0), 0, qr0 + psq_l f26, 24(m1), 0, qr0 + ps_sub f24, f24, f16 + psq_l f19, 32(m0), 1, qr0 + psq_l f27, 32(m1), 1, qr0 + ps_sub f25, f25, f17 + psq_l f20, 36(m0), 0, qr0 + psq_l f28, 36(m1), 0, qr0 + ps_sub f26, f26, f18 + psq_l f21, 44(m0), 1, qr0 + psq_l f29, 44(m1), 1, qr0 + ps_sub f27, f27, f19 + ps_sub f28, f28, f20 + ps_sub f29, f29, f21 + psq_lu f30, 4(wtBase), 1, qr0 + psq_lu f8, 4(srcBase), 0, qr0 + psq_lu f9, 8(srcBase), 1, qr0 + ps_madds0 f0, f22, f30, f14 + ps_madds0 f1, f23, f30, f15 + ps_madds0 f2, f24, f30, f16 + ps_madds0 f3, f25, f30, f17 + ps_madds0 f4, f26, f30, f18 + ps_madds0 f5, f27, f30, f19 + ps_madds0 f6, f28, f30, f20 + ps_madds0 f7, f29, f30, f21 + ps_madds0 f12, f0, f8, f6 + ps_madds0 f13, f1, f8, f7 + psq_lu f30, 4(wtBase), 1, qr0 +loop: + ps_madds1 f12, f2, f8, f12 + ps_madds1 f13, f3, f8, f13 + psq_lu f8, 4(srcBase), 0, qr0 + ps_madds0 f10, f4, f9, f12 + ps_madds0 f11, f5, f9, f13 + psq_lu f9, 8(srcBase), 1, qr0 + ps_madds0 f0, f22, f30, f14 + ps_madds0 f1, f23, f30, f15 + ps_madds0 f2, f24, f30, f16 + ps_madds0 f3, f25, f30, f17 + ps_madds0 f4, f26, f30, f18 + ps_madds0 f5, f27, f30, f19 + ps_madds0 f6, f28, f30, f20 + ps_madds0 f7, f29, f30, f21 + psq_stu f10, 4(dstBase), 0, qr0 + ps_madds0 f12, f0, f8, f6 + ps_madds0 f13, f1, f8, f7 + psq_stu f11, 8(dstBase), 1, qr0 + psq_lu f30, 4(wtBase), 1, qr0 + bdnz loop + ps_madds1 f12, f2, f8, f12 + ps_madds1 f13, f3, f8, f13 + ps_madds0 f10, f4, f9, f12 + psq_stu f10, 4(dstBase), 0, qr0 + ps_madds0 f11, f5, f9, f13 + psq_stu f11, 8(dstBase), 1, qr0 + lfd f14, 8(r1) + lfd f15, 16(r1) + lfd f16, 24(r1) + lfd f17, 32(r1) + lfd f18, 40(r1) + lfd f19, 48(r1) + lfd f20, 56(r1) + lfd f21, 64(r1) + lfd f22, 72(r1) + lfd f23, 80(r1) + lfd f24, 88(r1) + lfd f25, 96(r1) + lfd f26, 104(r1) + lfd f27, 112(r1) + lfd f28, 120(r1) + lfd f29, 128(r1) + lfd f30, 136(r1) + addi r1, r1, 160 + blr + /* clang-format on */ +} + +asm void PSMTXROMultS16VecArray(const register ROMtx m, const register S16Vec *srcBase, register Vec *dstBase, register u32 count) +{ + /* clang-format off */ + nofralloc + stwu r1, -64(r1) + stfd f14, 8(r1) + subi r7, count, 1 + stfd f15, 16(r1) + srwi r7, r7, 1 + stfd f16, 24(r1) + lis r8, 7 + stfd f17, 32(r1) + mtspr GQR6, r8 + stfd f18, 40(r1) + mtctr r7 + psq_l f0, 0(m), 0, qr0 + subi srcBase, srcBase, 4 + psq_l f1, 8(m), 1, qr0 + subi dstBase, dstBase, 4 + psq_l f6, 36(m), 0, qr0 + psq_lu f8, 4(srcBase), 0, qr6 + psq_l f7, 44(m), 1, qr0 + psq_lu f9, 4(srcBase), 0, qr6 + ps_madds0 f11, f0, f8, f6 + psq_l f2, 12(m), 0, qr0 + ps_madds0 f12, f1, f8, f7 + psq_l f3, 20(m), 1, qr0 + ps_madds1 f13, f0, f9, f6 + psq_lu f10, 4(srcBase), 0, qr6 + ps_madds1 f14, f1, f9, f7 + psq_l f5, 32(m), 1, qr0 + ps_madds1 f11, f2, f8, f11 + ps_madds1 f12, f3, f8, f12 + psq_l f4, 24(m), 0, qr0 + ps_madds0 f13, f2, f10, f13 + psq_lu f8, 4(srcBase), 0, qr6 + ps_madds0 f14, f3, f10, f14 + ps_madds0 f15, f4, f9, f11 + ps_madds0 f16, f5, f9, f12 + psq_lu f9, 4(srcBase), 0, qr6 + ps_madds1 f17, f4, f10, f13 + ps_madds1 f18, f5, f10, f14 + psq_lu f10, 4(srcBase), 0, qr6 +loop: + ps_madds0 f11, f0, f8, f6 + psq_stu f15, 4(dstBase), 0, qr0 + ps_madds0 f12, f1, f8, f7 + psq_stu f16, 8(dstBase), 1, qr0 + ps_madds1 f13, f0, f9, f6 + psq_stu f17, 4(dstBase), 0, qr0 + ps_madds1 f14, f1, f9, f7 + psq_stu f18, 8(dstBase), 1, qr0 + ps_madds1 f11, f2, f8, f11 + ps_madds1 f12, f3, f8, f12 + psq_lu f8, 4(srcBase), 0, qr6 + ps_madds0 f13, f2, f10, f13 + ps_madds0 f14, f3, f10, f14 + ps_madds0 f15, f4, f9, f11 + ps_madds0 f16, f5, f9, f12 + psq_lu f9, 4(srcBase), 0, qr6 + ps_madds1 f17, f4, f10, f13 + ps_madds1 f18, f5, f10, f14 + psq_lu f10, 4(srcBase), 0, qr6 + bdnz loop + psq_stu f15, 4(dstBase), 0, qr0 + clrlwi. r7, count, 31 + psq_stu f16, 8(dstBase), 1, qr0 + bne exit + psq_stu f17, 4(dstBase), 0, qr0 + psq_stu f18, 8(dstBase), 1, qr0 +exit: + lfd f14, 8(r1) + lfd f15, 16(r1) + lfd f16, 24(r1) + lfd f17, 32(r1) + lfd f18, 40(r1) + addi r1, r1, 64 + blr + /* clang-format on */ +} + +asm void PSMTXMultS16VecArray(const register Mtx44 m, const register S16Vec *srcBase, register Vec *dstBase, register u32 count) +{ + /* clang-format off */ + psq_l f0, 0(m), 0, qr0 + lis r7, 7 + mtspr GQR6, r7 + psq_l f6, 0(srcBase), 0, qr6 + subi count, count, 1 + psq_l f7, 4(srcBase), 1, qr6 + mtctr count + psq_l f1, 8(m), 0, qr0 + addi srcBase, srcBase, 4 + psq_l f2, 16(m), 0, qr0 + subi dstBase, dstBase, 4 + psq_l f3, 24(m), 0, qr0 + ps_mul f8, f0, f6 + psq_l f4, 32(m), 0, qr0 + ps_mul f10, f2, f6 + psq_l f5, 40(m), 0, qr0 + ps_mul f12, f4, f6 + psq_lu f6, 2(srcBase), 0, qr1 + ps_madd f8, f1, f7, f8 + ps_madd f10, f3, f7, f10 + ps_madd f12, f5, f7, f12 + psq_lu f7, 4(srcBase), 1, qr6 + ps_sum0 f9, f8, f8, f8 +loop: + ps_sum0 f11, f10, f10, f10 + ps_mul f8, f0, f6 + ps_sum0 f13, f12, f12, f12 + ps_mul f10, f2, f6 + psq_stu f9, 4(dstBase), 1, qr0 + ps_mul f12, f4, f6 + psq_stu f11, 4(dstBase), 1, qr0 + ps_madd f8, f1, f7, f8 + psq_stu f13, 4(dstBase), 1, qr0 + ps_madd f10, f3, f7, f10 + psq_lu f6, 2(srcBase), 0, qr6 + ps_madd f12, f5, f7, f12 + psq_lu f7, 4(srcBase), 1, qr6 + ps_sum0 f9, f8, f8, f8 + bdnz loop + ps_sum0 f11, f10, f10, f10 + ps_sum0 f13, f12, f12, f12 + psq_stu f9, 4(dstBase), 1, qr0 + psq_stu f11, 4(dstBase), 1, qr0 + psq_stu f13, 4(dstBase), 1, qr0 + /* clang-format on */ +} diff --git a/src/dolphin/mtx/quat.c b/src/dolphin/mtx/quat.c new file mode 100644 index 00000000..a874e14c --- /dev/null +++ b/src/dolphin/mtx/quat.c @@ -0,0 +1,80 @@ +#include "dolphin/mtx.h" +#include "math.h" + +void PSQUATMultiply(register const Quaternion *a, register const Quaternion *b, register Quaternion *ab) +{ + asm { + psq_l f0, 0(a), 0, 0 + psq_l f1, 8(a), 0, 0 + psq_l f2, 0(b), 0, 0 + ps_neg f5, f0 + psq_l f3, 8(b), 0, 0 + ps_neg f6, f1 + ps_merge01 f4, f5, f0 + ps_muls0 f7, f1, f2 + ps_muls0 f5, f5, f2 + ps_merge01 f1, f6, f1 + ps_muls1 f8, f4, f2 + ps_madds0 f7, f4, f3, f7 + ps_muls1 f2, f1, f2 + ps_madds0 f5, f1, f3, f5 + ps_madds1 f8, f6, f3, f8 + ps_merge10 f7, f7, f7 + ps_madds1 f2, f0, f3, f2 + ps_merge10 f5, f5, f5 + ps_add f7, f7, f2 + psq_st f7, 0(ab), 0, 0 + ps_sub f5, f5, f8 + psq_st f5, 8(ab), 0, 0 + } +} + +void C_QUATRotAxisRad(Quaternion *q, const Vec *axis, f32 rad) +{ + f32 tmp, tmp2, tmp3; + Vec dst; + + tmp = rad; + PSVECNormalize(axis, &dst); + + tmp2 = tmp * 0.5f; + tmp3 = sinf(tmp * 0.5f); + tmp = tmp3; + tmp3 = cosf(tmp2); + + q->x = tmp * dst.x; + q->y = tmp * dst.y; + q->z = tmp * dst.z; + q->w = tmp3; +} + +void C_QUATSlerp(const Quaternion *p, const Quaternion *q, Quaternion *r, f32 t) +{ + f32 ratioA, ratioB; + + f32 value = 1.0f; + f32 cosHalfTheta = p->x * q->x + p->y * q->y + p->z * q->z + p->w * q->w; + + if (cosHalfTheta < 0.0f) { + cosHalfTheta = -cosHalfTheta; + value = -value; + } + + if (cosHalfTheta <= 0.9999899864196777f) { + f32 halfTheta = acosf(cosHalfTheta); + f32 sinHalfTheta = sinf(halfTheta); + + ratioA = sinf((1.0f - t) * halfTheta) / sinHalfTheta; + ratioB = sinf(t * halfTheta) / sinHalfTheta; + value *= ratioB; + } + else { + ratioA = 1.0f - t; + value *= t; + } + + r->x = (ratioA * p->x) + (value * q->x); + r->y = (ratioA * p->y) + (value * q->y); + r->z = (ratioA * p->z) + (value * q->z); + r->w = (ratioA * p->w) + (value * q->w); +} diff --git a/src/dolphin/mtx/vec.c b/src/dolphin/mtx/vec.c new file mode 100644 index 00000000..985a6196 --- /dev/null +++ b/src/dolphin/mtx/vec.c @@ -0,0 +1,287 @@ +#include "dolphin/mtx.h" +#include "math.h" + +#define R_RET fp1 +#define FP2 fp2 +#define FP3 fp3 +#define FP4 fp4 +#define FP5 fp5 +#define FP6 fp6 +#define FP7 fp7 +#define FP8 fp8 +#define FP9 fp9 +#define FP10 fp10 +#define FP11 fp11 +#define FP12 fp12 +#define FP13 fp13 + +asm void PSVECAdd(const register Vec *vec1, const register Vec *vec2, register Vec *ret) +{ +#ifdef __MWERKS__ // clang-format off + nofralloc; + psq_l FP2, 0(vec1), 0, 0; + psq_l FP4, 0(vec2), 0, 0; + ps_add FP6, FP2, FP4; + psq_st FP6, 0(ret), 0, 0; + psq_l FP3, 8(vec1), 1, 0; + psq_l FP5, 8(vec2), 1, 0; + ps_add FP7, FP3, FP5; + psq_st FP7, 8(ret), 1, 0; + blr +#endif // clang-format on +} + +asm void PSVECSubtract(const register Vec *vec1, const register Vec *vec2, register Vec *ret) +{ +#ifdef __MWERKS__ // clang-format off + nofralloc; + psq_l FP2, 0(vec1), 0, 0; + psq_l FP4, 0(vec2), 0, 0; + ps_sub FP6, FP2, FP4; + psq_st FP6, 0(ret), 0, 0; + psq_l FP3, 8(vec1), 1, 0; + psq_l FP5, 8(vec2), 1, 0; + ps_sub FP7, FP3, FP5; + psq_st FP7, 8(ret), 1, 0; + blr +#endif // clang-format on +} + +asm void PSVECScale(register const Vec *src, register Vec *dst, register f32 scale) +{ +#ifdef __MWERKS__ // clang-format off + nofralloc + psq_l f0, 0(src), 0, 0 + psq_l f2, 8(src), 1, 0 + ps_muls0 f0, f0, f1 + psq_st f0, 0(dst), 0, 0 + ps_muls0 f0, f2, f1 + psq_st f0, 8(dst), 1, 0 + blr +#endif // clang-format on +} + +void C_VECScale(const Vec *src, Vec *dst, f32 scale) +{ + f32 s; + + s = 1.0f / sqrtf(src->z * src->z + src->x * src->x + src->y * src->y); + dst->x = src->x * s; + dst->y = src->y * s; + dst->z = src->z * s; +} + +void PSVECNormalize(const register Vec *vec1, register Vec *ret) +{ +#ifdef __MWERKS__ // clang-format off + register f32 half = 0.5f; + register f32 three = 3.0f; + register f32 xx_zz, xx_yy; + register f32 square_sum; + register f32 ret_sqrt; + register f32 n_0, n_1; + asm { + psq_l FP2, 0(vec1), 0, 0; + ps_mul xx_yy, FP2, FP2; + psq_l FP3, 8(vec1), 1, 0; + ps_madd xx_zz, FP3, FP3, xx_yy; + ps_sum0 square_sum, xx_zz, FP3, xx_yy; + frsqrte ret_sqrt, square_sum; + fmuls n_0, ret_sqrt, ret_sqrt; + fmuls n_1, ret_sqrt, half; + fnmsubs n_0, n_0, square_sum, three; + fmuls ret_sqrt, n_0, n_1; + ps_muls0 FP2, FP2, ret_sqrt; + psq_st FP2, 0(ret), 0, 0; + ps_muls0 FP3, FP3, ret_sqrt; + psq_st FP3, 8(ret), 1, 0; + } +#endif // clang-format on +} + +asm f32 PSVECSquareMag(register const Vec *v) { +#ifdef __MWERKS__ // clang-format off + nofralloc + psq_l f0, 0(v), 0, 0 + ps_mul f0, f0, f0 + lfs f1, 8(v) + ps_madd f1, f1, f1, f0 + ps_sum0 f1, f1, f0, f0 + blr +#endif // clang-format on +} + +f32 PSVECMag(const register Vec *v) +{ + register f32 v_xy, v_zz, square_mag; + register f32 ret_mag, n_0, n_1; + register f32 three, half, zero; +#ifdef __MWERKS__ // clang-format off + asm { + psq_l v_xy, 0(v), 0, 0 + ps_mul v_xy, v_xy, v_xy + lfs v_zz, 8(v) + ps_madd square_mag, v_zz, v_zz, v_xy + } +#endif // clang-format on + half = 0.5f; +#ifdef __MWERKS__ // clang-format off + asm { + ps_sum0 square_mag, square_mag, v_xy, v_xy + frsqrte ret_mag, square_mag + } +#endif // clang-format on + three = 3.0f; +#ifdef __MWERKS__ // clang-format off +asm { + fmuls n_0, ret_mag, ret_mag + fmuls n_1, ret_mag, half + fnmsubs n_0, n_0, square_mag, three + fmuls ret_mag, n_0, n_1 + fsel ret_mag, ret_mag, ret_mag, square_mag + fmuls square_mag, square_mag, ret_mag + } +#endif // clang-format on + return square_mag; +} + +asm f32 PSVECDotProduct(const register Vec *vec1, const register Vec *vec2) +{ +#ifdef __MWERKS__ // clang-format off + nofralloc; + psq_l f2, 4(r3), 0, 0 /* qr0 */ + psq_l f3, 4(r4), 0, 0 /* qr0 */ + ps_mul f2, f2, f3 + psq_l f5, 0(r3), 0, 0 /* qr0 */ + psq_l f4, 0(r4), 0, 0 /* qr0 */ + ps_madd f3, f5, f4, f2 + ps_sum0 f1, f3, f2, f2 + blr +#endif // clang-format on +} + +asm void PSVECCrossProduct(register const Vec *a, register const Vec *b, register Vec *axb) +{ +#ifdef __MWERKS__ // clang-format off + nofralloc + psq_l f1, 0(b), 0, 0 + lfs f2, 8(a) + psq_l f0, 0(a), 0, 0 + ps_merge10 f6, f1, f1 + lfs f3, 8(b) + ps_mul f4, f1, f2 + ps_muls0 f7, f1, f0 + ps_msub f5, f0, f3, f4 + ps_msub f8, f0, f6, f7 + ps_merge11 f9, f5, f5 + ps_merge01 f10, f5, f8 + psq_st f9, 0(axb), 1, 0 + ps_neg f10, f10 + psq_st f10, 4(axb), 0, 0 + blr +#endif // clang-format on +} + +void C_VECHalfAngle(const Vec *a, const Vec *b, Vec *half) +{ + Vec a0; + Vec b0; + Vec ab; + + a0.x = -a->x; + a0.y = -a->y; + a0.z = -a->z; + + b0.x = -b->x; + b0.y = -b->y; + b0.z = -b->z; + + VECNormalize(&a0, &a0); + VECNormalize(&b0, &b0); + VECAdd(&a0, &b0, &ab); + + if (VECDotProduct(&ab, &ab) > 0.0f) { + VECNormalize(&ab, half); + } + else { + *half = ab; + } +} + +void C_VECReflect(const Vec *src, const Vec *normal, Vec *dst) +{ + // Vec a0; + // Vec b0; + // f32 dot; + + // a0.x = -src->x; + // a0.y = -src->y; + // a0.z = -src->z; + + // VECNormalize(&a0, &a0); + // VECNormalize(normal, &b0); + + // dot = VECDotProduct(&a0, &b0); + // dst->x = b0.x * 2.0f * dot - a0.x; + // dst->y = b0.y * 2.0f * dot - a0.y; + // dst->z = b0.z * 2.0f * dot - a0.z; + + // VECNormalize(dst, dst); +} + +asm f32 PSVECSquareDistance(register const Vec *a, register const Vec *b) { +#ifdef __MWERKS__ // clang-format off + nofralloc + psq_l f0, 4(a), 0, 0 + psq_l f1, 4(b), 0, 0 + ps_sub f2, f0, f1 + psq_l f0, 0(a), 0, 0 + psq_l f1, 0(b), 0, 0 + ps_mul f2, f2, f2 + ps_sub f0, f0, f1 + ps_madd f1, f0, f0, f2 + ps_sum0 f1, f1, f2, f2 + blr +#endif // clang-format on +} + +f32 PSVECDistance(register const Vec *a, register const Vec *b) +{ + + register f32 half_c; + register f32 three_c; + register f32 dist; + +#ifdef __MWERKS__ // clang-format off + asm { + psq_l f0, 4(a), 0, 0 /* qr0 */ + psq_l f1, 4(b), 0, 0 /* qr0 */ + ps_sub f2, f0, f1 + psq_l f0, 0(a), 0, 0 /* qr0 */ + psq_l f1, 0(b), 0, 0 /* qr0 */ + ps_mul f2, f2, f2 + ps_sub f0, f0, f1 + } + + half_c = 0.5f; + + asm { + ps_madd f0, f0, f0, f2 + ps_sum0 f0, f0, f2, f2 + } + + three_c = 3.0f; + + asm { + frsqrte dist, f0 + fmuls f2, dist, dist + fmuls dist, dist, half_c + fnmsubs f2, f2, f0, three_c + fmuls dist, f2, dist + fsel dist, dist, dist, f0 + fmuls dist, f0, dist + } + + return dist; +#endif // clang-format on +} diff --git a/src/dolphin/pad/Pad.c b/src/dolphin/pad/Pad.c new file mode 100644 index 00000000..1852cf0d --- /dev/null +++ b/src/dolphin/pad/Pad.c @@ -0,0 +1,783 @@ +#include +#include + +u8 UnkVal : (OS_BASE_CACHED | 0x30e3); +u16 __OSWirelessPadFixMode : (OS_BASE_CACHED | 0x30E0); + +static void PADTypeAndStatusCallback(s32 chan, u32 type); +static void PADOriginCallback(s32 chan, u32 error, OSContext *context); +static void PADProbeCallback(s32 chan, u32 error, OSContext *context); +static void SPEC0_MakeStatus(s32 chan, PADStatus *status, u32 data[2]); +static void SPEC1_MakeStatus(s32 chan, PADStatus *status, u32 data[2]); +static void SPEC2_MakeStatus(s32 chan, PADStatus *status, u32 data[2]); +static void PADTypeAndStatusCallback(s32 chan, u32 type); + +static void PADOriginCallback(s32 chan, u32 error, OSContext *context); +static void PADProbeCallback(s32 chan, u32 error, OSContext *context); + +static void SPEC0_MakeStatus(s32 chan, PADStatus *status, u32 data[2]); +static void SPEC1_MakeStatus(s32 chan, PADStatus *status, u32 data[2]); +static void SPEC2_MakeStatus(s32 chan, PADStatus *status, u32 data[2]); + +static BOOL Initialized; + +static u32 EnabledBits; +static u32 ResettingBits; +static s32 ResettingChan = 32; +static u32 RecalibrateBits; +static u32 WaitingBits; +static u32 CheckingBits; +static u32 PendingBits; + +static u32 XPatchBits = PAD_CHAN0_BIT | PAD_CHAN1_BIT | PAD_CHAN2_BIT | PAD_CHAN3_BIT; + +static u32 AnalogMode = 0x00000300u; + +u32 __PADSpec; +static u32 Spec = 5; +static void (*MakeStatus)(s32, PADStatus *, u32[2]) = SPEC2_MakeStatus; + +static u32 Type[SI_MAX_CHAN]; +static PADStatus Origin[SI_MAX_CHAN]; + +static u32 CmdReadOrigin = 0x41 << 24; +static u32 CmdCalibrate = 0x42 << 24; +static u32 CmdProbeDevice[SI_MAX_CHAN]; + +static BOOL OnReset(BOOL final); + +static OSResetFunctionInfo ResetFunctionInfo = { OnReset, 127 }; + +static void (*SamplingCallback)(void); + +static void PADEnable(s32 chan) +{ + u32 cmd; + u32 chanBit; + u32 data[2]; + + chanBit = PAD_CHAN0_BIT >> chan; + EnabledBits |= chanBit; + SIGetResponse(chan, data); + cmd = (0x40 << 16) | AnalogMode; + SISetCommand(chan, cmd); + SIEnablePolling(EnabledBits); +} + +static void PADDisable(s32 chan) +{ + BOOL enabled; + u32 chanBit; + + enabled = OSDisableInterrupts(); + + chanBit = PAD_CHAN0_BIT >> chan; + SIDisablePolling(chanBit); + EnabledBits &= ~chanBit; + WaitingBits &= ~chanBit; + CheckingBits &= ~chanBit; + PendingBits &= ~chanBit; + OSSetWirelessID(chan, 0); + + OSRestoreInterrupts(enabled); +} + +static void DoReset(void) +{ + u32 chanBit; + + ResettingChan = __cntlzw(ResettingBits); + if (ResettingChan == 32) { + return; + } + + chanBit = PAD_CHAN0_BIT >> ResettingChan; + ResettingBits &= ~chanBit; + + memset(&Origin[ResettingChan], 0, sizeof(PADStatus)); + SIGetTypeAsync(ResettingChan, PADTypeAndStatusCallback); +} + +static void UpdateOrigin(s32 chan) +{ + PADStatus *origin; + u32 chanBit = PAD_CHAN0_BIT >> chan; + + origin = &Origin[chan]; + switch (AnalogMode & 0x00000700u) { + case 0x00000000u: + case 0x00000500u: + case 0x00000600u: + case 0x00000700u: + origin->triggerL &= ~15; + origin->triggerR &= ~15; + origin->analogA &= ~15; + origin->analogB &= ~15; + break; + case 0x00000100u: + origin->substickX &= ~15; + origin->substickY &= ~15; + origin->analogA &= ~15; + origin->analogB &= ~15; + break; + case 0x00000200u: + origin->substickX &= ~15; + origin->substickY &= ~15; + origin->triggerL &= ~15; + origin->triggerR &= ~15; + break; + case 0x00000300u: + break; + case 0x00000400u: + break; + } + + origin->stickX -= 128; + origin->stickY -= 128; + origin->substickX -= 128; + origin->substickY -= 128; + + if (XPatchBits & chanBit) { + if (64 < origin->stickX && (SIGetType(chan) & 0xffff0000) == SI_GC_CONTROLLER) { + origin->stickX = 0; + } + } +} + +static void PADOriginCallback(s32 chan, u32 error, OSContext *context) +{ + if (!(error & (SI_ERROR_UNDER_RUN | SI_ERROR_OVER_RUN | SI_ERROR_NO_RESPONSE | SI_ERROR_COLLISION))) { + UpdateOrigin(ResettingChan); + PADEnable(ResettingChan); + } + DoReset(); +} + +static void PADOriginUpdateCallback(s32 chan, u32 error, OSContext *context) +{ + + if (!(EnabledBits & (PAD_CHAN0_BIT >> chan))) { + return; + } + + if (!(error & (SI_ERROR_UNDER_RUN | SI_ERROR_OVER_RUN | SI_ERROR_NO_RESPONSE | SI_ERROR_COLLISION))) { + UpdateOrigin(chan); + } + + if (error & SI_ERROR_NO_RESPONSE) { + PADDisable(chan); + } +} + +static void PADProbeCallback(s32 chan, u32 error, OSContext *context) +{ + if (!(error & (SI_ERROR_UNDER_RUN | SI_ERROR_OVER_RUN | SI_ERROR_NO_RESPONSE | SI_ERROR_COLLISION))) { + PADEnable(ResettingChan); + WaitingBits |= PAD_CHAN0_BIT >> ResettingChan; + } + DoReset(); +} + +static void PADTypeAndStatusCallback(s32 chan, u32 type) +{ + u32 chanBit; + u32 recalibrate; + BOOL rc = TRUE; + u32 error; + chanBit = PAD_CHAN0_BIT >> ResettingChan; + error = type & 0xFF; + recalibrate = RecalibrateBits & chanBit; + RecalibrateBits &= ~chanBit; + + if (error & (SI_ERROR_UNDER_RUN | SI_ERROR_OVER_RUN | SI_ERROR_NO_RESPONSE | SI_ERROR_COLLISION)) { + DoReset(); + return; + } + + type &= ~0xFF; + + Type[ResettingChan] = type; + + if ((type & SI_TYPE_MASK) != SI_TYPE_GC || !(type & SI_GC_STANDARD)) { + DoReset(); + return; + } + + if (Spec < PAD_SPEC_2) { + PADEnable(ResettingChan); + DoReset(); + return; + } + + if (!(type & SI_GC_WIRELESS) || (type & SI_WIRELESS_IR)) { + if (recalibrate) { + rc = SITransfer(ResettingChan, &CmdCalibrate, 3, &Origin[ResettingChan], 10, PADOriginCallback, 0); + } + else { + rc = SITransfer(ResettingChan, &CmdReadOrigin, 1, &Origin[ResettingChan], 10, PADOriginCallback, 0); + } + } + else if ((type & SI_WIRELESS_FIX_ID) && (type & SI_WIRELESS_CONT_MASK) == SI_WIRELESS_CONT && !(type & SI_WIRELESS_LITE)) { + if (type & SI_WIRELESS_RECEIVED) { + rc = SITransfer(ResettingChan, &CmdReadOrigin, 1, &Origin[ResettingChan], 10, PADOriginCallback, 0); + } + else { + rc = SITransfer(ResettingChan, &CmdProbeDevice[ResettingChan], 3, &Origin[ResettingChan], 8, PADProbeCallback, 0); + } + } + if (!rc) { + PendingBits |= chanBit; + DoReset(); + return; + } +} + +static void PADReceiveCheckCallback(s32 chan, u32 type) +{ + u32 error; + u32 chanBit; + + chanBit = PAD_CHAN0_BIT >> chan; + if (!(EnabledBits & chanBit)) { + return; + } + + error = type & 0xFF; + type &= ~0xFF; + + WaitingBits &= ~chanBit; + CheckingBits &= ~chanBit; + + if (!(error & (SI_ERROR_UNDER_RUN | SI_ERROR_OVER_RUN | SI_ERROR_NO_RESPONSE | SI_ERROR_COLLISION)) && (type & SI_GC_WIRELESS) + && (type & SI_WIRELESS_FIX_ID) && (type & SI_WIRELESS_RECEIVED) && !(type & SI_WIRELESS_IR) + && (type & SI_WIRELESS_CONT_MASK) == SI_WIRELESS_CONT && !(type & SI_WIRELESS_LITE)) { + SITransfer(chan, &CmdReadOrigin, 1, &Origin[chan], 10, PADOriginUpdateCallback, 0); + } + else { + PADDisable(chan); + } +} + +BOOL PADReset(u32 mask) +{ + BOOL enabled; + u32 diableBits; + + enabled = OSDisableInterrupts(); + + mask |= PendingBits; + PendingBits = 0; + mask &= ~(WaitingBits | CheckingBits); + ResettingBits |= mask; + diableBits = ResettingBits & EnabledBits; + EnabledBits &= ~mask; + + if (Spec == PAD_SPEC_4) { + RecalibrateBits |= mask; + } + + SIDisablePolling(diableBits); + + if (ResettingChan == 32) { + DoReset(); + } + OSRestoreInterrupts(enabled); + return TRUE; +} + +BOOL PADRecalibrate(u32 mask) +{ + BOOL enabled; + u32 disableBits; + + enabled = OSDisableInterrupts(); + + mask |= PendingBits; + PendingBits = 0; + mask &= ~(WaitingBits | CheckingBits); + ResettingBits |= mask; + disableBits = ResettingBits & EnabledBits; + EnabledBits &= ~mask; + + if (!(UnkVal & 0x40)) { + RecalibrateBits |= mask; + } + + SIDisablePolling(disableBits); + if (ResettingChan == 32) { + DoReset(); + } + OSRestoreInterrupts(enabled); + return TRUE; +} + +BOOL PADInit() +{ + s32 chan; + if (Initialized) { + return TRUE; + } + + if (__PADSpec) { + PADSetSpec(__PADSpec); + } + + Initialized = TRUE; + + if (__PADFixBits != 0) { + OSTime time = OSGetTime(); + __OSWirelessPadFixMode = (u16)((((time)&0xffff) + ((time >> 16) & 0xffff) + ((time >> 32) & 0xffff) + ((time >> 48) & 0xffff)) & 0x3fffu); + RecalibrateBits = PAD_CHAN0_BIT | PAD_CHAN1_BIT | PAD_CHAN2_BIT | PAD_CHAN3_BIT; + } + + for (chan = 0; chan < SI_MAX_CHAN; ++chan) { + CmdProbeDevice[chan] = (0x4D << 24) | (chan << 22) | ((__OSWirelessPadFixMode & 0x3fffu) << 8); + } + + SIRefreshSamplingRate(); + OSRegisterResetFunction(&ResetFunctionInfo); + + return PADReset(PAD_CHAN0_BIT | PAD_CHAN1_BIT | PAD_CHAN2_BIT | PAD_CHAN3_BIT); +} + +#define offsetof(type, memb) ((u32) & ((type *)0)->memb) + +u32 PADRead(PADStatus *status) +{ + BOOL enabled; + s32 chan; + u32 data[2]; + u32 chanBit; + u32 sr; + int chanShift; + u32 motor; + + enabled = OSDisableInterrupts(); + + motor = 0; + for (chan = 0; chan < SI_MAX_CHAN; chan++, status++) { + chanBit = PAD_CHAN0_BIT >> chan; + chanShift = 8 * (SI_MAX_CHAN - 1 - chan); + + if (PendingBits & chanBit) { + PADReset(0); + status->err = PAD_ERR_NOT_READY; + memset(status, 0, offsetof(PADStatus, err)); + continue; + } + + if ((ResettingBits & chanBit) || ResettingChan == chan) { + status->err = PAD_ERR_NOT_READY; + memset(status, 0, offsetof(PADStatus, err)); + continue; + } + + if (!(EnabledBits & chanBit)) { + status->err = (s8)PAD_ERR_NO_CONTROLLER; + memset(status, 0, offsetof(PADStatus, err)); + continue; + } + + if (SIIsChanBusy(chan)) { + status->err = PAD_ERR_TRANSFER; + memset(status, 0, offsetof(PADStatus, err)); + continue; + } + + sr = SIGetStatus(chan); + if (sr & SI_ERROR_NO_RESPONSE) { + SIGetResponse(chan, data); + + if (WaitingBits & chanBit) { + status->err = (s8)PAD_ERR_NONE; + memset(status, 0, offsetof(PADStatus, err)); + + if (!(CheckingBits & chanBit)) { + CheckingBits |= chanBit; + SIGetTypeAsync(chan, PADReceiveCheckCallback); + } + continue; + } + + PADDisable(chan); + + status->err = (s8)PAD_ERR_NO_CONTROLLER; + memset(status, 0, offsetof(PADStatus, err)); + continue; + } + + if (!(SIGetType(chan) & SI_GC_NOMOTOR)) { + motor |= chanBit; + } + + if (!SIGetResponse(chan, data)) { + status->err = PAD_ERR_TRANSFER; + memset(status, 0, offsetof(PADStatus, err)); + continue; + } + + if (data[0] & 0x80000000) { + status->err = PAD_ERR_TRANSFER; + memset(status, 0, offsetof(PADStatus, err)); + continue; + } + + MakeStatus(chan, status, data); + + // Check and clear PAD_ORIGIN bit + if (status->button & 0x2000) { + status->err = PAD_ERR_TRANSFER; + memset(status, 0, offsetof(PADStatus, err)); + + // Get origin. It is okay if the following transfer fails + // since the PAD_ORIGIN bit remains until the read origin + // command complete. + SITransfer(chan, &CmdReadOrigin, 1, &Origin[chan], 10, PADOriginUpdateCallback, 0); + continue; + } + + status->err = PAD_ERR_NONE; + + // Clear PAD_INTERFERE bit + status->button &= ~0x0080; + } + + OSRestoreInterrupts(enabled); + return motor; +} + +void PADControlAllMotors(const u32 *commandArray) +{ + BOOL enabled; + int chan; + u32 command; + BOOL commit; + u32 chanBit; + + enabled = OSDisableInterrupts(); + commit = FALSE; + for (chan = 0; chan < SI_MAX_CHAN; chan++, commandArray++) { + chanBit = PAD_CHAN0_BIT >> chan; + if ((EnabledBits & chanBit) && !(SIGetType(chan) & SI_GC_NOMOTOR)) { + command = *commandArray; + if (Spec < PAD_SPEC_2 && command == PAD_MOTOR_STOP_HARD) { + command = PAD_MOTOR_STOP; + } + + SISetCommand(chan, (0x40 << 16) | AnalogMode | (command & (0x00000001 | 0x00000002))); + commit = TRUE; + } + } + if (commit) { + SITransferCommands(); + } + OSRestoreInterrupts(enabled); +} + +void PADControlMotor(s32 chan, u32 command) +{ + BOOL enabled; + u32 chanBit; + + enabled = OSDisableInterrupts(); + chanBit = PAD_CHAN0_BIT >> chan; + if ((EnabledBits & chanBit) && !(SIGetType(chan) & SI_GC_NOMOTOR)) { + if (Spec < PAD_SPEC_2 && command == PAD_MOTOR_STOP_HARD) { + command = PAD_MOTOR_STOP; + } + + SISetCommand(chan, (0x40 << 16) | AnalogMode | (command & (0x00000001 | 0x00000002))); + SITransferCommands(); + } + OSRestoreInterrupts(enabled); +} + +void PADSetSpec(u32 spec) +{ + __PADSpec = 0; + switch (spec) { + case PAD_SPEC_0: + MakeStatus = SPEC0_MakeStatus; + break; + case PAD_SPEC_1: + MakeStatus = SPEC1_MakeStatus; + break; + case PAD_SPEC_2: + case PAD_SPEC_3: + case PAD_SPEC_4: + case PAD_SPEC_5: + MakeStatus = SPEC2_MakeStatus; + break; + } + Spec = spec; +} + +u32 PADGetSpec(void) +{ + return Spec; +} + +static void SPEC0_MakeStatus(s32 chan, PADStatus *status, u32 data[2]) +{ + status->button = 0; + status->button |= ((data[0] >> 16) & 0x0008) ? PAD_BUTTON_A : 0; + status->button |= ((data[0] >> 16) & 0x0020) ? PAD_BUTTON_B : 0; + status->button |= ((data[0] >> 16) & 0x0100) ? PAD_BUTTON_X : 0; + status->button |= ((data[0] >> 16) & 0x0001) ? PAD_BUTTON_Y : 0; + status->button |= ((data[0] >> 16) & 0x0010) ? PAD_BUTTON_START : 0; + status->stickX = (s8)(data[1] >> 16); + status->stickY = (s8)(data[1] >> 24); + status->substickX = (s8)(data[1]); + status->substickY = (s8)(data[1] >> 8); + status->triggerL = (u8)(data[0] >> 8); + status->triggerR = (u8)data[0]; + status->analogA = 0; + status->analogB = 0; + if (170 <= status->triggerL) { + status->button |= PAD_TRIGGER_L; + } + if (170 <= status->triggerR) { + status->button |= PAD_TRIGGER_R; + } + status->stickX -= 128; + status->stickY -= 128; + status->substickX -= 128; + status->substickY -= 128; +} + +static void SPEC1_MakeStatus(s32 chan, PADStatus *status, u32 data[2]) +{ + + status->button = 0; + status->button |= ((data[0] >> 16) & 0x0080) ? PAD_BUTTON_A : 0; + status->button |= ((data[0] >> 16) & 0x0100) ? PAD_BUTTON_B : 0; + status->button |= ((data[0] >> 16) & 0x0020) ? PAD_BUTTON_X : 0; + status->button |= ((data[0] >> 16) & 0x0010) ? PAD_BUTTON_Y : 0; + status->button |= ((data[0] >> 16) & 0x0200) ? PAD_BUTTON_START : 0; + + status->stickX = (s8)(data[1] >> 16); + status->stickY = (s8)(data[1] >> 24); + status->substickX = (s8)(data[1]); + status->substickY = (s8)(data[1] >> 8); + + status->triggerL = (u8)(data[0] >> 8); + status->triggerR = (u8)data[0]; + + status->analogA = 0; + status->analogB = 0; + + if (170 <= status->triggerL) { + status->button |= PAD_TRIGGER_L; + } + if (170 <= status->triggerR) { + status->button |= PAD_TRIGGER_R; + } + + status->stickX -= 128; + status->stickY -= 128; + status->substickX -= 128; + status->substickY -= 128; +} + +static s8 ClampS8(s8 var, s8 org) +{ + if (0 < org) { + s8 min = (s8)(-128 + org); + if (var < min) { + var = min; + } + } + else if (org < 0) { + s8 max = (s8)(127 + org); + if (max < var) { + var = max; + } + } + return var -= org; +} + +static u8 ClampU8(u8 var, u8 org) +{ + if (var < org) { + var = org; + } + return var -= org; +} + +#define PAD_ALL \ + (PAD_BUTTON_LEFT | PAD_BUTTON_RIGHT | PAD_BUTTON_DOWN | PAD_BUTTON_UP | PAD_TRIGGER_Z | PAD_TRIGGER_R | PAD_TRIGGER_L | PAD_BUTTON_A \ + | PAD_BUTTON_B | PAD_BUTTON_X | PAD_BUTTON_Y | PAD_BUTTON_MENU | 0x2000 | 0x0080) + +static void SPEC2_MakeStatus(s32 chan, PADStatus *status, u32 data[2]) +{ + PADStatus *origin; + + status->button = (u16)((data[0] >> 16) & PAD_ALL); + status->stickX = (s8)(data[0] >> 8); + status->stickY = (s8)(data[0]); + + switch (AnalogMode & 0x00000700) { + case 0x00000000: + case 0x00000500: + case 0x00000600: + case 0x00000700: + status->substickX = (s8)(data[1] >> 24); + status->substickY = (s8)(data[1] >> 16); + status->triggerL = (u8)(((data[1] >> 12) & 0x0f) << 4); + status->triggerR = (u8)(((data[1] >> 8) & 0x0f) << 4); + status->analogA = (u8)(((data[1] >> 4) & 0x0f) << 4); + status->analogB = (u8)(((data[1] >> 0) & 0x0f) << 4); + break; + case 0x00000100: + status->substickX = (s8)(((data[1] >> 28) & 0x0f) << 4); + status->substickY = (s8)(((data[1] >> 24) & 0x0f) << 4); + status->triggerL = (u8)(data[1] >> 16); + status->triggerR = (u8)(data[1] >> 8); + status->analogA = (u8)(((data[1] >> 4) & 0x0f) << 4); + status->analogB = (u8)(((data[1] >> 0) & 0x0f) << 4); + break; + case 0x00000200: + status->substickX = (s8)(((data[1] >> 28) & 0x0f) << 4); + status->substickY = (s8)(((data[1] >> 24) & 0x0f) << 4); + status->triggerL = (u8)(((data[1] >> 20) & 0x0f) << 4); + status->triggerR = (u8)(((data[1] >> 16) & 0x0f) << 4); + status->analogA = (u8)(data[1] >> 8); + status->analogB = (u8)(data[1] >> 0); + break; + case 0x00000300: + status->substickX = (s8)(data[1] >> 24); + status->substickY = (s8)(data[1] >> 16); + status->triggerL = (u8)(data[1] >> 8); + status->triggerR = (u8)(data[1] >> 0); + status->analogA = 0; + status->analogB = 0; + break; + case 0x00000400: + status->substickX = (s8)(data[1] >> 24); + status->substickY = (s8)(data[1] >> 16); + status->triggerL = 0; + status->triggerR = 0; + status->analogA = (u8)(data[1] >> 8); + status->analogB = (u8)(data[1] >> 0); + break; + } + + status->stickX -= 128; + status->stickY -= 128; + status->substickX -= 128; + status->substickY -= 128; + + origin = &Origin[chan]; + status->stickX = ClampS8(status->stickX, origin->stickX); + status->stickY = ClampS8(status->stickY, origin->stickY); + status->substickX = ClampS8(status->substickX, origin->substickX); + status->substickY = ClampS8(status->substickY, origin->substickY); + status->triggerL = ClampU8(status->triggerL, origin->triggerL); + status->triggerR = ClampU8(status->triggerR, origin->triggerR); +} + +BOOL PADGetType(s32 chan, u32 *type) +{ + u32 chanBit; + + *type = SIGetType(chan); + chanBit = PAD_CHAN0_BIT >> chan; + if ((ResettingBits & chanBit) || ResettingChan == chan || !(EnabledBits & chanBit)) { + return FALSE; + } + return TRUE; +} + +BOOL PADSync(void) +{ + return ResettingBits == 0 && ResettingChan == 32 && !SIBusy(); +} + +void PADSetAnalogMode(u32 mode) +{ + BOOL enabled; + u32 mask; + + enabled = OSDisableInterrupts(); + AnalogMode = mode << 8; + mask = EnabledBits; + + EnabledBits &= ~mask; + WaitingBits &= ~mask; + CheckingBits &= ~mask; + + SIDisablePolling(mask); + OSRestoreInterrupts(enabled); +} + +static BOOL OnReset(BOOL f) +{ + static BOOL recalibrated = FALSE; + BOOL sync; + + if (SamplingCallback) { + PADSetSamplingCallback(NULL); + } + + if (!f) { + sync = PADSync(); + if (!recalibrated && sync) { + recalibrated = PADRecalibrate(PAD_CHAN0_BIT | PAD_CHAN1_BIT | PAD_CHAN2_BIT | PAD_CHAN3_BIT); + return FALSE; + } + return sync; + } + else { + recalibrated = FALSE; + } + + return TRUE; +} + +void __PADDisableXPatch(void) +{ + XPatchBits = 0; +} + +static void SamplingHandler(__OSInterrupt interrupt, OSContext *context) +{ + OSContext exceptionContext; + + if (SamplingCallback) { + OSClearContext(&exceptionContext); + OSSetCurrentContext(&exceptionContext); + SamplingCallback(); + OSClearContext(&exceptionContext); + OSSetCurrentContext(context); + } +} + +PADSamplingCallback PADSetSamplingCallback(PADSamplingCallback callback) +{ + PADSamplingCallback prev; + + prev = SamplingCallback; + SamplingCallback = callback; + if (callback) { + SIRegisterPollingHandler(SamplingHandler); + } + else { + SIUnregisterPollingHandler(SamplingHandler); + } + return prev; +} + +BOOL __PADDisableRecalibration(BOOL disable) +{ + BOOL enabled; + BOOL prev; + + enabled = OSDisableInterrupts(); + prev = (UnkVal & 0x40) ? TRUE : FALSE; + UnkVal &= (u8)~0x40; + if (disable) { + UnkVal |= 0x40; + } + OSRestoreInterrupts(enabled); + return prev; +} diff --git a/src/dolphin/pad/Padclamp.c b/src/dolphin/pad/Padclamp.c new file mode 100644 index 00000000..a96d75c4 --- /dev/null +++ b/src/dolphin/pad/Padclamp.c @@ -0,0 +1,119 @@ +#include + +#include + +typedef struct PADClampRegion { + u8 minTrigger; + u8 maxTrigger; + s8 minStick; + s8 maxStick; + s8 xyStick; + s8 minSubstick; + s8 maxSubstick; + s8 xySubstick; +} PADClampRegion; + +static PADClampRegion ClampRegion = { + // Triggers + 30, + 180, + + // Left stick + 15, + 72, + 40, + + // Right stick + 15, + 59, + 31, +}; + +static void ClampStick(s8 *px, s8 *py, s8 max, s8 xy, s8 min) +{ + int x = *px; + int y = *py; + int signX; + int signY; + int d; + + if (0 <= x) { + signX = 1; + } + else { + signX = -1; + x = -x; + } + + if (0 <= y) { + signY = 1; + } + else { + signY = -1; + y = -y; + } + + if (x <= min) { + x = 0; + } + else { + x -= min; + } + if (y <= min) { + y = 0; + } + else { + y -= min; + } + + if (x == 0 && y == 0) { + *px = *py = 0; + return; + } + + if (xy * y <= xy * x) { + d = xy * x + (max - xy) * y; + if (xy * max < d) { + x = (s8)(xy * max * x / d); + y = (s8)(xy * max * y / d); + } + } + else { + d = xy * y + (max - xy) * x; + if (xy * max < d) { + x = (s8)(xy * max * x / d); + y = (s8)(xy * max * y / d); + } + } + + *px = (s8)(signX * x); + *py = (s8)(signY * y); +} + +static void ClampTrigger(u8 *trigger, u8 min, u8 max) +{ + if (*trigger <= min) { + *trigger = 0; + } + else { + if (max < *trigger) { + *trigger = max; + } + *trigger -= min; + } +} + +void PADClamp(PADStatus *status) +{ + // int i; + // for (i = 0; i < PAD_CHANMAX; i++, status++) { + // if (status->err != PAD_ERR_NONE) { + // continue; + // } + + // ClampStick(&status->stickX, &status->stickY, ClampRegion.maxStick, ClampRegion.xyStick, ClampRegion.minStick); + // ClampStick(&status->substickX, &status->substickY, ClampRegion.maxSubstick, ClampRegion.xySubstick, ClampRegion.minSubstick); + // ClampTrigger(&status->triggerL, ClampRegion.minTrigger, ClampRegion.maxTrigger); + // ClampTrigger(&status->triggerR, ClampRegion.minTrigger, ClampRegion.maxTrigger); + // } +}