;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; AccuracyCoin ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; This ROM is a collection of accuracy tests on an NROM cartridge. ; NOTE: While most of these tests are universal to all revisions of the NES board, CPU, and PPU, there are a handful of tests that are not going to pass on all revisions. ; To be more specific, these tests were designed for an RP2A03G APU/CPU, and an RP2C02G PPU. ; Additionally, if you run this ROM on your console with a flash cart, you might fail some tests. Notably, the open bus tests fail on an Everdrive N8 Pro. ; If you are looking for a specific test, consider CRTL + F searching for "TestPages:", as that's where the list of tests is. ; The format for the tests as they are stored in the ROM is: ; table "Name of test", $FF, Address_To_Store_Test_Results, Address_To_Jump_To_In_Order_To_Run_The_Test ; so to easily find the code for a test, you can search for the "Address_To_Jump_To_In_Order_To_Run_The_Test:" routine for a given test. ; NOTE: The NMI and IRQ vectors both point to RAM. This allows me to create tests that have different NMI/IRQ routines. ;;;; HEADER AND COMPILER STUFF ;;;; .inesprg 2 ; 2 banks .ineschr 1 ; .inesmap 0 ; mapper 0 = NROM .inesmir 0 ; background mirroring, horizontal ;;;; CONSTANTS ;;;; flag_c = $1 flag_z = $2 flag_i = $4 flag_d = $8 flag_v = $40 flag_n = $80 byte0 = $0 byte1 = $1 byte2 = $2 byte3 = $3 suitePointer = $5 dontSetPointer = $7 byte8 = $8 byte9 = $9 byteF = $F ErrorCode = $10 initialSubTest = $11 result_DMADMASync_PreTest = $12 menuTabXPos = $14 menuCursorXPos = $15 menuCursorYPos = $16 menuHeight = $17 controller = $18 controller_New = $19 JSRFromRAM = $1A JSRFromRAM1 = $1B JSRFromRAM2 = $1C JSRFromRAM3 = $1D TestResultPointer = $1E Test_UnOp_OperandTargetAddrLo = $20 Test_UnOp_OperandTargetAddrHi = $21 Test_UnOp_ValueAtAddressForTest = $22 Test_UnOp_A = $23 Test_UnOp_X = $24 Test_UnOp_Y = $25 Test_UnOp_FlagsInit = $26 Test_UnOp_SP = $27 Test_UnOp_ExpectedResultAddrLo = $28 Test_UnOp_ExpectedResultAddrHi = $29 Test_UnOp_ValueAtAddressResult = $2A Test_UnOp_CMP = $2B Test_UnOp_CPX = $2C Test_UnOp_CPY = $2D Test_UnOp_CM_Flags = $2E Test_UnOp_CPS = $2F Test_UnOp_IndirectPointerLo = $30 Test_UnOp_IndirectPointerHi = $31 Test_UnOp_CycleDelayPostDMA = $32 HighlightTextPrinted = $33 AutomateTestSuite = $34 RunningAllTests = $35 PostAllTestScreen = $36 PostAllTestTally = $37 PostAllPassTally = $38 PrintDecimalTensCheck = $39 result_VblankSync_PreTest = $3A DebugMode = $3B IncorrectReturnAddressOffset = $3C AllTestMenuTestNameOffsetLo = $3D AllTestMenuTestNameOffsetHi = $3E Reserverd_41 = $41 ; Used in the Implied Dummy Reads. It's probably best we never actually use this. PostDMACyclesUntilTestInstruction = 13 Test_ZeroPageReserved = $50 ; through $5F Test_ZeroPageReserved2 = $60 ; through $6F (rarely used, but let's still avoid putting engine stuff here.) TESTHighlightTextCopy = $7A suiteAttributeCopy = $7E suitePointerList = $80 suiteExecPointerList = $A0 Reserverd_C1 = $C1 ; Used in the Implied Dummy Reads. It's probably best we never actually use this. Reserved_C8 = $C8; For my "unofficial opcodes are correct length" tests, I use [Two-Byte-Opcode][INY], and then check the value of Y. Since INY is $C8, I'd like to avoid corrupting something stored in byte C8. Reserverd_E1 = $E1 ; Used in the Implied Dummy Reads. It's probably best we never actually use this. Debug_EC = $EC ; This is used to see how far an emulator gets before hanging when loading the main menu. Copy_X2 = $ED ; These are exclusively used to keep registers from before RunTest from being modified during a test, so they can be restored after the test. Copy_Y2 = $EE ; ^ Copy_A2 = $EF ; ^ PPUCTRL_COPY = $F0 PPUMASK_COPY = $F1 Copy_SP = $FA Copy_SP2 = $FB Copy_Flags = $FC Copy_X = $FD Copy_Y = $FE Copy_A = $FF PowerOnRAM = $300 PowerOnVRAM = $320 PowerOnPalette = $340 PowerOnTest_PPUReset = $360 ; 1 byte. Pass/fail the PPU Reset flag test. PowerOn_A = $370 PowerOn_X = $371 PowerOn_Y = $372 PowerOn_SP = $373 PowerOn_P = $374 PowerOn_MagicNumber = $3F0 ;$400 to $4FF is where I store the results of the tests. result_Unimplemented = $0400 result_CPUInstr = $0401 result_UnofficialInstr = $0402 result_RAMMirror = $0403 result_PPURegMirror = $0404 result_ROMnotWritable = $0405 result_DummyReads = $0406 result_DummyWrites = $0407 result_OpenBus = $0408 result_UnOp_SLO_03 = $409 result_UnOp_SLO_07 = $40A result_UnOp_SLO_0F = $40B result_UnOp_SLO_13 = $40C result_UnOp_SLO_17 = $40D result_UnOp_SLO_1B = $40E result_UnOp_SLO_1F = $40F result_UnOp_ANC_0B = $410 result_UnOp_ANC_2B = $411 result_UnOp_ASR_4B = $412 result_UnOp_ARR_6B = $413 result_UnOp_ANE_8B = $414 result_UnOp_LXA_AB = $415 result_UnOp_AXS_CB = $416 result_UnOp_SBC_EB = $417 result_UnOp_Magic = $3FB ; page 3 omits the test from the all-test-result-table. result_UnOp_RLA_23 = $419 result_UnOp_RLA_27 = $41A result_UnOp_RLA_2F = $41B result_UnOp_RLA_33 = $41C result_UnOp_RLA_37 = $41D result_UnOp_RLA_3B = $41E result_UnOp_RLA_3F = $41F result_UnOp_SRE_43 = $420 result_UnOp_SRE_47 = $47F ; It's pretty funny, but I need address $421 to always be $00. (see Implied Dummy Reads where bit 5 of the opcode is set.) result_UnOp_SRE_4F = $422 result_UnOp_SRE_53 = $423 result_UnOp_SRE_57 = $424 result_UnOp_SRE_5B = $425 result_UnOp_SRE_5F = $426 result_UnOp_RRA_63 = $427 result_UnOp_RRA_67 = $428 result_UnOp_RRA_6F = $429 result_UnOp_RRA_73 = $42A result_UnOp_RRA_77 = $42B result_UnOp_RRA_7B = $42C result_UnOp_RRA_7F = $42D result_UnOp_SAX_83 = $42E result_UnOp_SAX_87 = $42F result_UnOp_SAX_8F = $430 result_UnOp_SAX_97 = $431 result_UnOp_LAX_A3 = $432 result_UnOp_LAX_A7 = $433 result_UnOp_LAX_AF = $434 result_UnOp_LAX_B3 = $435 result_UnOp_LAX_B7 = $436 result_UnOp_LAX_BF = $437 result_UnOp_DCP_C3 = $438 result_UnOp_DCP_C7 = $439 result_UnOp_DCP_CF = $43A result_UnOp_DCP_D3 = $43B result_UnOp_DCP_D7 = $43C result_UnOp_DCP_DB = $43D result_UnOp_DCP_DF = $43E result_UnOp_ISC_E3 = $43F result_UnOp_ISC_E7 = $440 result_UnOp_ISC_EF = $441 result_UnOp_ISC_F3 = $442 result_UnOp_ISC_F7 = $443 result_UnOp_ISC_FB = $444 result_UnOp_ISC_FF = $445 result_UnOp_SHA_93 = $446 result_UnOp_SHA_9F = $447 result_UnOp_SHS_9B = $448 result_UnOp_SHY_9C = $449 result_UnOp_SHX_9E = $44A result_UnOp_LAE_BB = $44B result_DMA_Plus_2007R = $44C result_ProgramCounter_Wraparound = $44D result_PPUOpenBus = $044E result_DMA_Plus_2007W = $44F result_VBlank_Beginning = $450 result_VBlank_End = $451 result_NMI_Control = $452 result_NMI_Timing = $453 result_NMI_Suppression = $454 result_NMI_VBL_End = $455 result_NMI_Disabled_VBL_Start = $456 result_Sprite0Hit_Behavior = $457 result_ArbitrarySpriteZero = $458 result_SprOverflow_Behavior = $459 result_MisalignedOAM_Behavior = $45A result_Address2004_Behavior = $45B result_APURegActivation = $45C result_DMA_Plus_4015R = $45D result_DMA_Plus_4016R = $45E result_ControllerStrobing = $45F result_InstructionTiming = $460 result_IFlagLatency = $461 result_NmiAndBrk = $462 result_NmiAndIrq = $463 result_RMW2007 = $464 result_APULengthCounter = $465 result_APULengthTable = $466 result_FrameCounterIRQ = $467 result_FrameCounter4Step = $468 result_FrameCounter5Step = $469 result_DeltaModulationChannel = $46A result_DMABusConflict = $46B result_DMA_Plus_OpenBus = $46C result_ImpliedDummyRead = $46D result_AddrMode_AbsIndex = $46E result_AddrMode_ZPgIndex = $46F result_AddrMode_Indirect = $470 result_AddrMode_IndIndeX = $471 result_AddrMode_IndIndeY = $472 result_AddrMode_Relative = $473 result_DecimalFlag = $474 result_BFlag = $475 result_PPUReadBuffer = $476 result_DMCDMAPlusOAMDMA = $477 result_ImplicitDMAAbort = $478 result_ExplicitDMAAbort = $479 result_ControllerClocking = $47A result_OAM_Corruption = $47B result_JSREdgeCases = $47C result_AllNOPs = $47D result_PaletteRAMQuirks = $47E ; 47F is used.If you add a new test, don't forget to skip that value. result_INC4014 = $480 result_AttributesAsTiles = $481 result_tRegisterQuirks = $482 result_StaleBGShiftRegisters = $483 result_Scanline0Sprites = $484 result_CHRROMIsNotWritable = $485 result_RenderingFlagBehavior = $486 result_BGSerialIn = $487 result_PowOn_CPURAM = $03FC ; page 3 omits the test from the all-test-result-table. result_PowOn_CPUReg = $03FD ; page 3 omits the test from the all-test-result-table. result_PowOn_PPURAM = $03FE ; page 3 omits the test from the all-test-result-table. result_PowOn_PPUPal = $03FF ; page 3 omits the test from the all-test-result-table. result_PowOn_PPUReset = $03FD ; page 3 omits the test from the all-test-result-table. ;$500 is dedicated to RAM needed for tests. ;$600 is dedicated to the IRQ routine ;$700 is dedicated to the NMI routine. ;;;; ASSEMBLY CODE ;;;; .org $8000 ; The open bus test needs to make sure an inaccurate emulation of open bus will fall into test code, so this function here is a fail condition of the open bus test. OpenBusTestFakedOpenBusBehavior: NOP ; An incorrect implementation of open bus might execute all the way to here from address $5000. NOP ; The two NOPS are for alignment, and this BRK takes the PC to some "test failed" handler. BRK ; Pushes 3 bytes to the stack, and moves the PC to the address determined by $FFFE, which is address $0600. CannotWriteToROM_01: .byte $01; This value is used in the "Cannot write to ROM" test. RESET: ; This ROM, despite the guidance of the NesDev Wiki's "startup code", writes a bunch of uninitialized registers, and reads uninitialized RAM. Intentionally. STA <$00 ; First thing we do at power on is store A to address $00 as a temporary place to hold it. This does not modify the CPU flags. PHP ; Push the processor flags... PLA ; ... and pull them off. AND #$CF ; Remove the B flag, and other garbage flag. STA <$01 ; And store this somewhere temporary. LDA PowerOn_MagicNumber ; Before we store these to the "test results", let's verify this is a cold boot and not a warm boot. CMP #$5A ; Assume a cold boot won't have this value here. BEQ RESET_SkipPowerOnTests ; If the value was $5A, skip storing stuff to RAM. LDA <$00 ; Okay cool, it's a cold boot. Let's start storing some stuff. Copy the value we set aside... STA PowerOn_A ; And store the value for use in TEST_PowerOnState_CPU_Registers STY PowerOn_Y ; Ditto for the Y register. STX PowerOn_X ; And the X register. TSX ; Let's fetch the stack pointer... STX PowerOn_SP ; And store it for that test. LDA <$01 ; We stored the flags here, so let's copy these... STA PowerOn_P ; And paste it in RAM. RESET_SkipPowerOnTests: SEI ; Time for some regular power on code. Fun fact: The I flag is already set at power on and when hitting reset. The CPU just does that. So this line isn't needed. CLD ; Disable the Decimal Flag. Who knows, maybe you hit reset in the middle of the Decimal Flag test. LDX #$EF ; Due to some tests modifying the stack pointer, it's convenient to put it at EF instead of FF. TXS ; This prevents some tests where the resulting stack pointer is 00 from pushing data, and overwriting the bottom of the stack. LDA #$40 STA $4017; Disable the APU Frame Counter IRQ. TEST_PPUResetFlag: ; All throughout this ROM, you will see me label the various tests like so: ; 3 semicolons, the error code that will appear if the test fails here, the name of the test in square brackets, then a description of what is being tested. ; Here's an example: ;;; Test 1 [PPU Reset Flag]: Are PPU Registers writable before the first pre-render line? ;;; ; They shouldn't be, as that's the job of the PPU Reset Flag! ; Let's see if the PPU Reset flag exists. LDA #$27 STA $2006 ; "magic address" (Writing to $2006 twice will update the 'v' register of the PPU) LDA #$BF STA $2006 ; 'v' = $27BF LDA #$5A ; "magic number". All over this ROM, you will frequently see me using the value $5A for tests. That's 01011010 in binary, and I just assume that if something goes wrong, it won't stumble on that number by random chance. STA $2007 ; Okay, I'll be back in 2 frames to check on you... LDX #$FF ; We're going to stall for VBlank, increment X, then X=0, so we're going to stall until next VBlank yet again. LDA $2002 VblLoop: LDA $2002 ; This is PPU_STATUS. Bit 7 tells us if the PPU is current in VBlank or not. BPL VblLoop ; So if bit 7 is 0 (we are not in VBlank) the Negative flag is not set, so "Branch of Plus" will be taken. INX ; X++ BEQ VblLoop ; If X is zero, we do this again. ; Now that the PPU is responsive, let's copy the resting values. LDA PowerOn_MagicNumber ; Check again if this is a cold or a warm boot. CMP #$5A BEQ PostResetFlagTest ; If this is a warm boot, skip copying the uninitialized RAM and VRAM. JSR Read32NametableBytes JSR ReadPaletteRAM ; Let's also see if the magic number was written to VRAM, to verify if the reset flag exists. ; It's worth noting that in its current state, this test fails on my console. I assume this has something to do with the flash cart I'm using. LDA #6 STA PowerOnTest_PPUReset ; set to FAIL (error code $1) by default. Overwrite with PASS if it passes. LDA #$27 STA $2006 LDA #$BF STA $2006 ; Set 'v' back to where we attempted to write our magic number. LDA $2007 ; load buffer LDA $2007 ; read buffer CMP #$5A BEQ PostResetFlagTest ; If A = $5A at this point, you fail the test since that means we wrote to VRAM before the PPU reset flag cleared. (Or uninitialized VRAM there was $5A?) ; The value of $5A was not written to VRAM, so the reset flag does exist! LDA #1 STA PowerOnTest_PPUReset ; Store a passing result here. ; I also indicate whenever a test is over with the following comment: ;; END OF TEST ;; PostResetFlagTest: JSR DisableRendering ; With uninitialized values from VRAM and Palette RAM copied for future reference, let's overwrite the palette and nametable. JSR SetUpDefaultPalette JSR ClearRAMExceptPage3 ; Page 3 holds a copy of uninitialized RAM, VRAM, Palette RAM... JSR VerifyJSRBehavior JSR ClearNametable LDA #$5A STA PowerOn_MagicNumber ; At this point, let's write out magic number to RAM, indicating that any reset after this point is a warm boot. ; So now if the reset button is pressed, we skip writing to the results of TEST_PowerOnState_CPU_Registers, and skip running the PPU reset flag test. ; I guess that means you could hit the reset button at any point before this to ruin the results of those tests. ; I'm not sure why you would do that though... ReloadMainMenu: ; There's an option to run every test in the ROM, and it draws a table of the results. This will run when exiting that screen with the table. ; If your emulator fails to reach the main menu of this ROM, check the value of address $EC. ; This can help inform you of specifically where your emulator hangs. INC 01 JSR ClearPage2 ; Page 2 is used for OAM. LDA #02 STA $4014 ; Set up OAM LDA #0 STA 02 ; set up the NMI routine. JSR SetUpNMIRoutineForMainMenu LDA #0 STA $100 ; initialize the placeholder test results. (While this ROM was in an early state, I had a list of tests I wanted to make, and stored all their results at $100) ; and also initialize the "print tests" results, as these tests use page 3, which is mostly uninitialized. STA result_UnOp_Magic STA result_PowOn_CPURAM STA result_PowOn_CPUReg STA result_PowOn_PPURAM STA result_PowOn_PPUPal STA $6000 ; An incorrect open bus implementation might end up executing address $6000, so let's initialize these 3 bytes to BRKs. STA $6001 ; Though I would prefer if this was a NES 2.0 cartridge without any PRG RAM, so writing here might do nothing anyway. STA $6002 ; There's still a good chance an emulator doesn't support NES 2.0 and just puts PRG RAM here anyway. INC 04 JSR WaitForVBlank INC 05 JSR TEST_VblankSync_PreTest; ; Initialize result_VblankSync_PreTest INC 06 JSR DMASync ; Initialize result_DMADMASync_PreTest LDA #$FF STA 07 JSR LoadSuiteMenu ; Determine all the tests on the current page, and store the pointers in RAM. INC 08 JSR DrawPageNumber ; Draw the correct page number at the top of the screen. INC 09 JSR WaitForVBlank ; Stall until the PPU is in VBlank. INC 0A JSR ResetScroll ; Set the ppu 'v' and 't' registers to $2000, and reset the fine scroll values as well. INC 0B JSR EnableRendering_BG; Enable rendering the background. (We don't need sprites here.) INC 0C JSR EnableNMI ; Enable the Non Maskable Interrupt. INC 0D ; If your emulator hangs here, you probably haven't implemented the NMI? InfiniteLoop: JMP InfiniteLoop ; This is the spinning loop while I wait for the NMI to occur. ;;;;;;;;;;;;;;;;;;;; VerifyJSRBehavior: ; Let's also verify that JSR is pushing the correct values to the stack. ; A handful of my subroutines pull off the values pushed by JSR, and use them to read data stored next to the JSR instruction. ; I need my code to still be able to load the menu even if the JSR return addresses are wrong. ; To verify this, I'll just put a JSR at address $0000, and jump there. LDA #$20 ; JSR STA <$00 LDA #Low(VerifyReturnAddressesAreCorrect) STA <$01 LDA #High(VerifyReturnAddressesAreCorrect) STA <$02 LDA #$60 ; RTS STA <$03 INC 03 JSR $0000 ; Verify return addresses pushed by JSR are correct. LDA #$20 STA > 1 = $66 + Carry set ; ROR <$66 ; $22 >> 1 = $91 ; STA ($91),Y ; A=60 -> ($15, $40) ; ; How could this one go wrong? Well, if writing to $4015 does not update the data bus, this will run RTI instead of RTS. ; We can prep for this by pushing $A0 to the stack, so if an RTI occurs, we'll return to TEST_OpenBusA0A0 LDX #0 LDY #0 LDA #$CD STA <$56 ; $56 = #$CD LDA #$22 STA <$66 ; $66 = #$22 LDA <$91 ; Push these to the stack PHA LDA <$92 ; These are important bytes for the test selection menu, so we'll restore these after the test. PHA LDA #$15 STA <$91 LDA #$40 STA <$92 ; ($91) = $4015 LDA #$A0 PHA LDA #$60 ; A = the value of RTS JSR $5600 ; Jump to open bus to run this! ; and if we made it back here, the test worked! ; restore the values from $91 and $92 PLA ; pull off the $A0 for the RTI guardrails. TEST_OpenBus_PostTest8: PLA STA <$92 PLA STA <$91 TXA ; X = 0 if we ran RTS, X = 1 if we ran RTI. CPX #$01 BEQ TEST_Fail2 INC $8F CMP #$8F BNE TEST_Fail4 LDA $0500 CMP #$84 BNE TEST_Fail4 ; SLO exists! INC $8C BCC TEST_Fail4 CMP #$8C BNE TEST_Fail4 ; ANC exists! INC $81 CMP #$81 BNE TEST_Fail4 LDA $0500 CMP #$85 BNE TEST_Fail4 ; RLA exists! INC > 1) -> $DE CMP #$DE BNE TEST_Fail4 ; SRE exists! INC > 1 = $20 CMP #$20 BNE TEST_Fail5 ; ASR exists! INC > 1) | $80*Carry = $C1 CMP #$C1 BNE TEST_Fail5 ; RRA exists! INC > 1) | $80*Carry = $A0 ; NOTE: This instruction also changes the flags in a unique way. ; I'm not testing for the flag stuff here. CMP #$A0 BNE TEST_Fail5 ; ARR exists! INC $1E80 ; This goes unstable, so the high byte of the target address will be changed. ; Hi = ($1E+1) & A & X; ; = $05 ; $500 = A & X & H ; = $1F & $FF & $1F ; = $1F ; H is the high byte of the target address +1. ; So we should write $1F to $0700 LDA $0700 CMP #$1F BNE TEST_Fail5 ; SHA ($zp), Y exists! INC =5, the read cycle of the LDX instruction will be after VBlank begins. X=$80, (the VBlank flag is cleared, so...) Y=$00. ; -(The bits get rearranged, and this is stored at $50 as "01") ; Put the X register into bit 1, and the Y register into bit 2. TXA ASL A LDA #0 ROL A STA <$00 TYA ASL A LDA #0 ROL A ASL A ORA <$00 ; A should now be 0000 00XY, where X and Y are bit 7 of X and Y. STA <$00 PLA TAX LDA <$00 STA <$50,X INX CPX #$07 BNE TEST_VBlank_Beginning_Loop ; Address $50 should now look exactly like TEST_VBlank_Beginning_Expected_Results LDX #0 TEST_VBlank_Beginning_Loop2: LDA <$50,X CMP TEST_VBlank_Beginning_Expected_Results,X BNE TEST_Fail9 INX CPX #$03 ; since byte 2 in this list could depend on CPU/PPU clock alignment... BNE TEST_VBlank_Beginning_Loop2_SkipByte2 ; let's ignore it. INX TEST_VBlank_Beginning_Loop2_SkipByte2: CPX #$07 BNE TEST_VBlank_Beginning_Loop2 ;; END OF TEST ;; LDA #1 RTS ;;;;;;; TEST_Fail9: JSR EnableRendering_BG JMP TEST_Fail TEST_VBlank_Beginning_Expected_Results: ; $00 is also acceptable in the fourth byte (byte 3), depending on CPU/PPU clock alignment. .byte $02, $02, $02, $02, $00, $01, $01 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; TEST_VBlank_End: ;;; Test 1 [VBLank Beginning]: Tests the timing of the $2002 VBlank flag ;;; ; Special thanks to blargg for figuring this stuff out. JSR DisableRendering LDX #0 TEST_VBlank_End_Loop: TXA JSR VblSync_Plus_A ; This next CPU cycle is synced with PPU cycle 0+A for this frame. ; VBlank ends in about 2273.333 CPU cycles. ; So let's stall for 2273-4 cycles, as this upcoming LDA takes 4 cycles. JSR Clockslide_2269 LDA $2002 ; Here's how this test works. ; When A=0, the LDA instruction occurs before VBlank ends. ; Every iteration of this loop, this test will run 1 PPU cycle closer to the end of VBlank than the previous iteration. ; Eventually, the VBlank flag is no longer set when the LDA instruction reads from $2002. (when A>=4) ; The bits are rearranged so the VBlank flag gets stored in bit 0, and this value is written to $50,X ASL A ; Shift VBlank flag into carry LDA #0 ; clear A ROL A ; Rotate carry into bit 0. STA <$50,X ; store in $50,X INX CPX #$07 BNE TEST_VBlank_End_Loop ; loop until X=7 ; Address $50 should now look exactly like TEST_VBlank_Beginning_Expected_Results LDX #0 TEST_VBlank_End_Loop2: LDA <$50,X CMP TEST_VBlank_End_Expected_Results,X BNE TEST_Fail9 INX CPX #$07 BNE TEST_VBlank_End_Loop2 ;; END OF TEST ;; LDA #1 RTS ;;;;;;; TEST_VBlank_End_Expected_Results: .byte $01, $01, $01, $01, $00, $00, $00, $00 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; FAIL_NMI_Control1: JSR DisableNMI JMP TEST_Fail TEST_NMI_Control: ; Special thanks to blargg. I'm pretty much just doing what they did here. LDA #$E8 ; INX opcode STA $700 LDA #$40 ; RTI opcode STA $701 ;;; Test 1 [NMI Control]: The NMI should not occur when disabled. ;;; ; The NMI *should* already be disabled (and it being enabled during the "WaitForVBlank" that happened before the jump here would be problematic...) ; but let's test this anyway. LDX #0 JSR Clockslide_29780 ; Wait 1 frame. CPX #0 BNE FAIL_NMI_Control1 ; If the NMI occurs, it will run INX, ... RTI. That would increment X to 1, thus failing the test. INC 0, X<8) the Sprite Zero Hit occurs. ;;; JSR WaitForVBlank JSR InitializeSpriteZero ; YPos, CHR, Att, XPos .byte $00, $FC, $00, $01 ; Xpos = 1 STX $4014 ; OAM DMA (we want to keep OAM refreshed for these tests) JSR Clockslide_3000 ; Wait long enough for VBlank to be over, and a few scanlines to render. (Sprite Zero hit should occur, for some sprite zero is visible.) LDA $2002 ; Bit 6 should be set, since the sprite zero hit should have occurred. AND #$40 BEQ FAIL_Sprite0Hit_Behavior INC =239. ;;; JSR WaitForVBlank JSR InitializeSpriteZero ; YPos, CHR, Att, XPos .byte 239, $FC, $00, $08 JSR WaitForVBlank STX $4014 ; OAM DMA (we want to keep OAM refreshed for these tests) JSR Clockslide_29780 ; Wait an entire frame, since this sprite is at the bottom of the screen. (Sprite Zero hit should NOT occur.) JSR Clockslide_500 ; wait a few more scanlines just to be sure. LDA $2002 ; Bit 6 should NOT be set, since the sprite zero hit should not have occurred. AND #$40 BNE FAIL_Sprite0Hit_Behavior2 INC = 1 && PPU cycle <= 64: clear Secondary-OAM. ; PPU cycle >= 65 && PPU cycle <= 256: Sprite evaluation ; PPU cycle >= 257 && PPU cycle <= 320: Shift register initialization. (Every one of these cycles also clears PPUOAMAddress to 0.) ; ; Keep in mind, on scanline n, we're processing the OAM data in preparation for scanline n+1. ; (So the sprites drawn on, for example, scanline 6 were evaluated during the rendering of scanline 5.) ; ; Let's take a deep look into how Sprite Evaluation works on any given scanline. (PPU cycle >= 65 && PPU cycle <= 256) ; for odd ppu cycles: read index "PPUOAMAddress" of OAM. Let's call the value read "S" ; - "PPUOAMAddress" is the value set by writing to $2003. ; - for our example here, assume PPUOAMAddress is zero, reset during the shift register initialization from the previous scanline. ; for even cycles: Evaluate "S" and determine if this sprite should be drawn on the next scanline. ; - Depending on how the evaluation goes, modify PPUOAMAddress. Typically increment by 1 or 4. (actually "it's complicated", but that's for a different test.) ; Let's focus on the even cycles. ; First of all, if Secondary-OAM is full, the behavior is different. Let's focus on when Secondary-OAM is not full. ; STEP 1: Check that the Y position of this object is in range of this scanline. ; - The value checked here is "S", which was read in the previous PPU cycle. In this example, "S" should be index 0 of OAM, since PPUOAMAddress was cleared in the previous scanline. ; - If the current scanline number-"S" is positive, and less than 8 (or 16 if the sprites are using the 8 by 16 mode) then this object is in range for this scanline. ; - That previous sentence was just a verbose way of calculating "yes, this object should be rendered on the next scanline". ; - Now, if Secondary-OAM is not full (in this example it is still empty, so yeah- it's not full) we know this object will be in the next scanline, so add it to Secondary-OAM. ; - In addition to being added to Secondary-OAM, if this is PPU cycle 66 of a scanline, it is assumed that we are processing sprite zero, so raise a flag indicating sprite zero exists on the next scanline. ; STEP 2 to 4: Read "S" as the CHR data, attributes, and X position respectively. (each of these steps happen 2 ppu cycles after the previous step, since we need to read "S" from OAM again) ; - There's actually some wild stuff going on with the X position, but that's for a different test. ; ; So to recap, read the Y position, and on the following cycle, see if it's in range. If it is, and this is PPU cycle 66 of a given scanline, raise a flag indicating sprite zero exists on the next scanline. ; - this "flag" essentially says, Secondary-OAM index 0 is "Sprite Zero", as in, a Sprite Zero hit will occur if a "solid pixel" of "sprite zero" overlaps a "solid pixel" of the background. ; - which is, again, a really verbose way of saying "run a check for a sprite zero hit next scanline" if the value of "S" on ppu cycle 66 was in range for this scanline. ; ; Duh. If you've implemented sprite zero hits, you should be following along. ; Perhaps that "flag" isn't the exact way your emulator checks if a sprite is "sprite zero", but it will make sense in a moment why I'm phrasing it this way. ; ; What happens if you write to $2003 after the "Shift register initialization" and before "Sprite evaluation"? ; Well, PPUOAMAddress won't be $00 when sprite evaluation begins. ; So the first sprite processed on cycle 66 won't necessarily be index zero of OAM. ; But if "S" is in range of the scanline, and it's cycle 66, then the next scanline will consider Secondary-OAM index 0 as "sprite zero", even if it isn't OAM index 0. ; ; In other words, it *is* possible for an object that isn't OAM index 0 to trigger a sprite 0 hit! ; On a completely unrelated topic, writing to $2003 is all sorts of jank, and I need my test here to prevent that "jankiness" from ruining the results. ; This behavior appears to only happen on CPU/PPU clock alignment 3: ; writing to $2003 can copy 8 bytes of OAM from $20 to $27, and paste these values at: ; The old PPUOAMAddress & $F8 ; The new PPUOAMAddress & $F8 ; So we also want to the 8 values starting at $20 to match the 8 values we want at the new PPUOAMAddress ; Since this test is a doozy, I will comment every line and explain why I'm doing this. JSR ClearPage2 ; Let's clear page 2. I'm using page 2 for the OAM DMA, so OAM will be a copy of $200 - $2FF JSR WaitForVBlank ; Wait for VBlank. I'm going to disable rendering next, and I'd prefer if I waited for VBlank to do that. JSR DisableRendering ; Rendering is now disabled. Rendering needs to be disabled for the upcoming VblSync_Plus_A subroutine to work properly. LDX #32 ; Let's initialize Sprite 32 at screen coordinates ($08, $00) JSR InitializeSpriteX ; This subroutine reads the following 4 bytes, and adjusts the return address accordingly, so the following 4 bytes are not executed. .byte $00, $FC, $00, $08 ; Y Position, Pattern Table Index, Attributes, X position LDX #8 ; Let's also initialize Sprite 8 with the same values, so the $2003 corruption doesn't break anything. JSR InitializeSpriteX ; This subroutine reads the following 4 bytes, and adjusts the return address accordingly, so the following 4 bytes are not executed. .byte $00, $FC, $00, $08 ; Y Position, Pattern Table Index, Attributes, X position LDA #0 ; A=0, since this next subroutine syncs to PPU cycle A of VBlank, and I want to sync to cycle 0. JSR VblSync_Plus_A ; Sync the next CPU cycle to PPU cycle 0 of VBlank. (cycle 1 of scanline 241) ; The CPU is now at PPU cycle 0 of VBlank. ; Let's calculate how many CPU cycles remain until scanline 0 is being rendered. ; We're on dot 1 of scanline 241. The final dot before scanline 0 is dot 341 of scanline 261 ; There are 341 PPU cycles per scanline ; (341 * 21)-1 = 7160 PPU cycles until dot 0 of scanline 0. ; 3 PPU cycles per 1 CPU cycle. 7160/3 = 2386.66 CPU cycles. ; So let's count CPU cycles. We have 2386 cycles until dot 0, which is when we want to write to $2003 to update PPUOAMAddress LDA #02 ; (+2 CPU cycles) A = 2, so the OAM DMA will use page 2 STA $4014 ; (+518 CPU cycles) Run the OAM DMA with page 2. LDA #32*4 ; (+2 CPU cycles) Load A with 32*4 (128, or $80) which is the OAM address for the object we initialized. JSR EnableRendering ; (+30 CPU cycles) Enable rendering of both the background and sprites, so the sprite zero hit can occur. ; After setting up sprite 8 and running the OAM DMA, we have 1596 CPU cycles remaining before cycle 0 of scanline 0. JSR Clockslide_1830 ; (+1598 CPU cycles) This function just stalls for 1598 CPU cycles, so we should be slightly after cycle 0 of scanline 0. STA $2003 ; Store A ($80) at PPUOAMAddress. (and probably copy 8 instances of $FF from OAM[$00] to OAM[$20], which won't break anything) ; Now, the sprite evaluation will occur with sprite 8 getting processed first. ; Since this object is the first one processed, PPU cycle 66 will check if it is in range of the current scanline. ; and if it is (it is), it will be treated as sprite zero for the purposes of triggering a sprite zero hit, despite being sprite 8. JSR Clockslide_500 ; Wait a few scanline for this entire sprite to be drawn LDA $2002 ; Read PPUSTATUS AND #$40 ; mask away every bit except the Sprite Zero Hit flag. BEQ FAIL_ArbitrarySpriteZero; If bit 6 was zero, the sprite zero hit did not occur, thus failing the test. JSR Clockslide_29780 ; Let's wait an entire frame and check again to weed out potential false positives. LDA $2002 ; Read PPUSTATUS AND #$40 ; mask away every bit except the Sprite Zero Hit flag. BNE FAIL_ArbitrarySpriteZero; If bit 6 was non-zero, the sprite zero hit did occur, thus failing the test. INC > 2) + 20 TYA PHA TXA CLC ADC #$03 LSR A LSR A CLC ADC #$20 STA $2006 ; Low = VRegisterByXIndexLowLUT[X&3] TXA AND #$03 TAY LDA VRegisterByXIndexLowLUT,Y STA $2006 PLA TAY RTS ;;;;;;; ReadPPUADDRFromWord: ; Takes the two bytes after the JSR instruction and stores them in $2006. Then reads $2007 twice. STY <$FE JSR CopyReturnAddressToByte0 LDA $2002 LDY #0 LDA [$0000],Y STA $2006 INY LDA [$0000],Y STA $2006 INY JSR FixRTS LDY <$FE LDA $2007 LDA $2007 RTS ;;;;;;; SetPPUADDRFromWord: ; pretty much the same as ReadPPUADDRFromWord, but it doesn't run LDA $2007 twice at the end. STA <$FF STY <$FE JSR CopyReturnAddressToByte0 LDA $2002 LDY #0 LDA [$0000],Y STA $2006 INY LDA [$0000],Y STA $2006 INY JSR FixRTS LDY <$FE LDA <$FF RTS ;;;;;;; WriteToPPUADDRWithByte: ; Sets up v then writes n to it, where n is the third bytes after the JSR STA <$FF STY <$FE JSR CopyReturnAddressToByte0 LDA $2002 LDY #0 WriteToPPUADDRWithByteLoop: LDA [$0000],Y CMP #$FF BEQ WriteToPPUADDRWithByteExit STA $2006 INY LDA [$0000],Y STA $2006 INY LDA [$0000],Y STA $2007 INY JMP WriteToPPUADDRWithByteLoop WriteToPPUADDRWithByteExit: INY JSR FixRTS LDY <$FE LDA <$FF RTS ;;;;;;; DoubleLDA2007: ; There are a few tests that need to read the contents of a PPU address. LDA $2007 ; and instead of actually writing out LDA $2007 twice (6 bytes) LDA $2007 ; you can just jump here instead. (3 bytes) RTS ;;;;;;; SetPPUReadBufferToA: ; Sets the value of the PPU Read buffer to A. PHA JSR SetPPUADDRFromWord .byte $2C, $00 STA $2007 JSR SetPPUADDRFromWord .byte $2C, $00 LDA $2007 PLA RTS ;;;;;;; PrepNMI_TimingTests: ; This is re-used in a handful of NMI timing tests. LDA #$C8 ; INY opcode STA $700 LDA #$40 ; RTI opcode STA $701 JSR DisableRendering LDX #0 RTS ;;;;;;; VRegisterByXIndexLowLUT: ; a look up table used in GetVRegisterByXIndexForMenu .byte $E1, $21, $61, $A1 AttributeNybbles: ; Attribute nybbles used in UpdateTESTAttributes .byte $F0, $0F AttributeNybblesInverse: ; Attribute nybbles used in UpdateTESTAttributes .byte $0F, $F0 AttributePaletteNybbles: ; Attribute nybbles used in UpdateTESTAttributes .byte $00, $55, $AA, $FF TestPassFailBlend: ; These are used in DrawTEST. index 0 of each of these spells "TEST". index 1 spells "PASS" and so on. .byte "TPF.D" .byte "EAA.R" .byte "SSI.A" .byte "TSL.W" AsciiToCHR: ; This table converts the ASCII values stored in the ROM to the indexes into the pattern table I made. .byte $24, $24, $24, $24, $24, $24, $24, $24, $24, $24, $24, $24, $24, $24, $24, $24 .byte $24, $24, $24, $24, $24, $24, $24, $24, $24, $24, $24, $24, $24, $24, $24, $24 .byte $24, $26, $24, $24, $35, $24, $24, $24, $24, $24, $32, $30, $29, $31, $25, $33 .byte $00, $01, $02, $03, $04, $05, $06, $07, $08, $09, $28, $24, $24, $34, $24, $27 .byte $24, $0A, $0B, $0C, $0D, $0E, $0F, $10, $11, $12, $13, $14, $15, $16, $17, $18 .byte $19, $1A, $1B, $1C, $1D, $1E, $1F, $20, $21, $22, $23, $24, $24, $24, $24, $24 .byte $24, $0A, $0B, $0C, $0D, $0E, $0F, $10, $11, $12, $13, $14, $15, $16, $17, $18 .byte $19, $1A, $1B, $1C, $1D, $1E, $1F, $20, $21, $22, $23;, $24, $24, $24, $24, $24 NMI_Routine: ; This is the NMI routine for the main menu. JSR ReadController1 JSR MaskDpadConflicts LDA 106 JSR Clockslide_50 ; 106 -> 56 RTS ; 56 -> 50 cycles after this RTS, a DMA will occur. DMASync_50MinusACyclesRemaining: ; Sync the CPU and the DMA, such that the DMA runs exactly 50-A CPU cycles after the RTS instruction ends. JSR DMASync ; the DMA is in 400 cycles; JSR Clockslide_100 ; 406 -> 306 JSR Clockslide_100 ; 306 -> 206 JSR Clockslide_50 ; 206 -> 156 JSR Clockslide_40 ; 156 -> 116 JSR Clockslide_21 ; 116 -> 95 LDA 92 JSR Clockslide37_Plus_A ; 92 -> (56-A) RTS ; (56-A) -> (50-A) cycles after this RTS, a DMA will occur. ;;;;;;; SyncTo1000CyclesUntilNMI: PHA LDA #$EA LDX #0 SyncTo1000CyclesUntilNMILoop: STA $500,X STA $600,X INX BNE SyncTo1000CyclesUntilNMILoop LDA #$4C STA $700 LDA #LOW(ConvertReturnAddressIntoCPUCycles) STA $701 LDA #HIGH(ConvertReturnAddressIntoCPUCycles) STA $702 LDA #$60 STA $6FF ; RTS at the end of page 6. JSR DisableRendering LDA #0 JSR VblSync_Plus_A ; We are now on dot 0 of VBlank JSR Clockslide_6000 ; 6000 JSR EnableNMI ; 6031 (wait for VBlank to end first) JSR Clockslide_20000; 26031 JSR Clockslide_2000 ; 28031 JSR Clockslide_700 ; 28731 JSR Clockslide_41 ; 28772 PLA RTS ;+6 ;;;;;;; ConvertReturnAddressIntoCPUCycles: TSX LDA $0103,X ; High byte of the return address ; The latest the NMI can happen is from address $06F2. ; Subtract 5 from the high byte. SEC SBC #$5 STA <$60 ; A very rare use of address $60, which is reserved for non-engine test stuff. LDA $0102,X ; Low byte of the return address. CLC ADC #$2 ; add 2 (JSR takes 6 cycles, which would be equivalent to 3 NOPs. Then subtract 1 from 3, ( =2 ) since the NMI happens after the instruction ends.) STA <$61 ; Keep in mind, these LDA's only work if the stack pointer isn't 00, but that's not really a concern here. LDA <$60 ; ADC #0 ; Add the carry. ; Now, the total number of CPU cycles/2 is stored at $60. ; Issue: Since we're dividing by 2, we don't know if we're on an even or an odd cycle. ; ; start by multiplying by 2. ASL A STA <$60 ASL <$61 LDA <$60 ADC #0 STA <$60 ; Okay, now we have the number of CPU cycles, &=$FFFE ; If this was an even cycle, add 0. If this was an odd cycle, add 1. ; We're pretty much just going to wait for the next NMI to determine this. LDX #1, and DEX. if the NMI happens before the DEX, then we add 1. Otherwise, add 0. LDA #Low(CRAICPUC_NMI) STA $701 LDA #High(CRAICPUC_NMI) STA $702 JSR Clockslide_29700 NOP ; +2 NOP ; +2 NOP NOP ; Now we clockslide until there's either 2 or 3 cycles until the NMI. LDX #0 ; The NMI either happens here... INX ; or here. JSR DisableNMI ; At this point, the 16-bit number stored in $60 (little endian) is exactly 1000-TotalCyclesOfTheTest ; So all we need to do to calculate TotalCyclesOfTheTest is subtract the 16-bit word at $60 from 1000 SEC LDA #LOW(1000) SBC <$61 STA <$61 LDA #HIGH(1000) SBC <$60 STA <$60 ; Subtract the overhead: SEC LDA <$61 SBC <$63 STA <$61 LDA <$60 SBC <$62 STA <$60 RTI ; returns to the test code. CRAICPUC_NMI: ; Convert Return Address Into CPU Cycles NMI Routine TXA CLC ADC <$61 STA <$61 LDA <$60 ADC <$60 RTI ;;;;;;; ; A giant list of ClockSlides! ; "What's a clockslide?" ; It's just a subroutine that wastes a precise amount of CPU cycles. ; If you want to waste exactly n cycles, run JSR Clockslide_n ; (Clockslide_14 through Clockslide_50 are defined, and most larger clockslides are a combination of JSRs to those clockslides) Clockslide_100Minus12: ; This is very handy for the following clockslides I want to make. 100, 200, etc. JSR Clockslide_26 ;=32 JSR Clockslide_50 ;=80 RTS ;=100-12. Remember, JSR and RTS add 12 cycles, so to make clockslide 100, I just need to JSR somewhere with JSR Clockslide_100Minus12 ;;;;;;; Clockslide_50000: JSR Clockslide_10000 Clockslide_40000: JSR Clockslide_10000 Clockslide_30000: JSR Clockslide_10000 Clockslide_20000: JSR Clockslide_10000 Clockslide_10000: JSR Clockslide_1000 Clockslide_9000: JSR Clockslide_1000 Clockslide_8000: JSR Clockslide_1000 Clockslide_7000: JSR Clockslide_1000 Clockslide_6000: JSR Clockslide_1000 Clockslide_5000: JSR Clockslide_1000 Clockslide_4000: JSR Clockslide_1000 Clockslide_3000: JSR Clockslide_1000 Clockslide_2000: JSR Clockslide_1000 Clockslide_1000: JSR Clockslide_100 Clockslide_900: JSR Clockslide_100 Clockslide_800: JSR Clockslide_100 Clockslide_700: JSR Clockslide_100 Clockslide_600: JSR Clockslide_100 Clockslide_500: JSR Clockslide_100 Clockslide_400: JSR Clockslide_100 Clockslide_300: JSR Clockslide_100 Clockslide_200: JSR Clockslide_100 Clockslide_100: JSR Clockslide_100Minus12 ; Since JSR and RTS take 12 cycles, let's stall for exactly 100-12 cycles. RTS ;;;;;;; ;A frame has about 29780 cycles, so let's make a few around that number. Clockslide_29700: JSR Clockslide_100Minus12 JSR Clockslide_600 ;700 JSR Clockslide_9000 ;9700 JSR Clockslide_20000;29700 RTS ;;;;;;; Clockslide_29750: JSR Clockslide_100Minus12 JSR Clockslide_50 ;150 JSR Clockslide_600 ;750 JSR Clockslide_9000 ;9750 JSR Clockslide_20000;29750 RTS ;;;;;;; Clockslide_29780: JSR Clockslide_100Minus12 JSR Clockslide_50 ;150 JSR Clockslide_30 ;180 JSR Clockslide_600 ;780 JSR Clockslide_9000 ;9780 JSR Clockslide_20000;29780 RTS ;;;;;;; Clockslide_29776: JSR Clockslide_100Minus12 JSR Clockslide_50 ;150 JSR Clockslide_26 ;176 JSR Clockslide_600 ;776 JSR Clockslide_9000 ;9776 JSR Clockslide_20000;29776 RTS ;;;;;;; Clockslide_2269: JSR Clockslide_100Minus12 JSR Clockslide_50 ;150 JSR Clockslide_19 ;169 JSR Clockslide_100 ;269 JSR Clockslide_2000 ;2269 RTS ;;;;;;; Clockslide_2252: JSR Clockslide_100Minus12 JSR Clockslide_50 ;150 NOP ;152 JSR Clockslide_100 ;252 JSR Clockslide_2000 ;2252 RTS ;;;;;;; Clockslide_2032: JSR Clockslide_100Minus12 JSR Clockslide_32 ;132 JSR Clockslide_900 ;1032 JSR Clockslide_1000 ;2032 RTS ;;;;;;; Clockslide_1830: JSR Clockslide_100Minus12 JSR Clockslide_30 ;130 JSR Clockslide_700 ;830 JSR Clockslide_1000 ;1830 RTS ;;;;;;; Clockslide_1816: ;=6 JSR Clockslide_100Minus12 JSR Clockslide_16 ;116 JSR Clockslide_700 ;816 JSR Clockslide_1000 ;1816 RTS ;;;;;;; Clockslide_14900: JSR Clockslide_100Minus12 JSR Clockslide_800 ;900 JSR Clockslide_4000 ;4900 JSR Clockslide_10000;14900 RTS ;;;;;;; Clockslide_29820: JSR Clockslide_100Minus12 JSR Clockslide_20 ;120 JSR Clockslide_700 ;820 JSR Clockslide_9000 ;9820 JSR Clockslide_20000;29820 RTS ;;;;;;; Clockslide_44730: JSR Clockslide_100Minus12 JSR Clockslide_30 ;130 JSR Clockslide_600 ;730 JSR Clockslide_4000 ;4730 JSR Clockslide_40000;44730 RTS ;;;;;;; Clockslide_37270: JSR Clockslide_100Minus12 JSR Clockslide_50 ;150 JSR Clockslide_20 ;170 JSR Clockslide_100 ;270 JSR Clockslide_7000 ;7270 JSR Clockslide_30000;37270 RTS ;;;;;;; Clockslide_52180: JSR Clockslide_100Minus12 JSR Clockslide_50 ;150 JSR Clockslide_30 ;180 JSR Clockslide_2000 ;2180 JSR Clockslide_50000;52180 RTS ;;;;;;; Clockslide_4320: JSR Clockslide_100Minus12 JSR Clockslide_20 ;120 JSR Clockslide_200 ;320 JSR Clockslide_4000 ;4320 RTS ;;;;;;; Clockslide_432: JSR Clockslide_100Minus12 JSR Clockslide_32 ;132 JSR Clockslide_300 ;420 RTS ;;;;;;; Clockslide_8640: JSR Clockslide_100Minus12 JSR Clockslide_40 ;140 JSR Clockslide_500 ;640 JSR Clockslide_8000 ;8640 RTS ;;;;;;; Clockslide_12960: JSR Clockslide_100Minus12 JSR Clockslide_40 ;140 JSR Clockslide_20 ;160 JSR Clockslide_800 ;960 JSR Clockslide_2000 ;2960 JSR Clockslide_10000 ;2960 RTS ;;;;;;; Clockslide_26352: JSR Clockslide_100Minus12 JSR Clockslide_40 ;140 JSR Clockslide_12 ;152 JSR Clockslide_200 ;352 JSR Clockslide_6000 ;6352 JSR Clockslide_20000;26352 RTS ;;;;;;; Clockslide_1728: JSR Clockslide_100Minus12 JSR Clockslide_28 ;128 JSR Clockslide_600 ;728 JSR Clockslide_1000 ;1728 RTS ;;;;;;; Clockslide_29766: JSR Clockslide_100Minus12 JSR Clockslide_50 ;150 JSR Clockslide_16 ;166 JSR Clockslide_600 ;766 JSR Clockslide_9000 ;9766 JSR Clockslide_20000;29766 RTS ;;;;;;; Clockslide_29765: JSR Clockslide_100Minus12 JSR Clockslide_50 ;150 JSR Clockslide_15 ;165 JSR Clockslide_600 ;765 JSR Clockslide_9000 ;9765 JSR Clockslide_20000;29765 RTS ;;;;;;; Clockslide_3395: JSR Clockslide_100Minus12 JSR Clockslide_50 ;150 JSR Clockslide_45 ;194 JSR Clockslide_200 ;394 JSR Clockslide_3000 ;3394 RTS ;;;;;;; Clockslide_3380: JSR Clockslide_100Minus12 JSR Clockslide_50 ;150 JSR Clockslide_30 ;180 JSR Clockslide_200 ;380 JSR Clockslide_3000 ;3380 RTS ;;;;;;; Clockslide37_Plus_A:;+6 STA <$00 ; +3 LDA #$FF ; +2 STA <$01 ; +3 LDA #36 ; +2 SEC ; +2 SBC <$00 ; +3 STA <$00 ; +3 JMP [$0000] ; 5 + A + 6 ;;;;;;;;;;;;;;;;; Clockslide64_Minus_A:;+6 STA <$00 ; +3 LDA #$FF ; +2 STA <$01 ; +3 JMP [$0000] ; +50 - A ;;;;;;;;;;;;;;;;; VblSync_Plus_A_End: ; Moved here for space. This is the end of the VblSync_Plus_A subroutine. JSR Clockslide_29780 JSR Clockslide_29750 NOP NOP NOP BIT $2002 RTS ;;;;;;; .org $FDF3 TEST_IFlagLatency_PageBoundaryTest: ;;; Test B [Interrupt Flag Latency]: Do branches poll for interrupts before cycle 4? (They should) ;;; JSR TEST_IFlagLatency_StartTest_10ExtraCycles ; clear address $50, and sync with DMA. X=0. We have 12 cycles until the DMA instead of the usual 2 these tests have used. LDA #$5A ; +2 (10 cycles until DMA) STA <$50 ; +3 (7 cycles until DMA) LDA 406 ; the next DMA is at (432) cycles, so we have 406 cycles to go. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; sync_dmc_fail: RTS ; The DMA timing will be way off on this test, but it was unable to sync anyway, so... Better than infinite looping? .org $FF00 Clockslide: ; JSR takes 6 cycles. ; The following bytes are labeled with the total cycles until the RTS instruction ends. ; Clockslide has a minimum of 12 cycles. ; EXAMPLE USE: Let's count CPU cycles! ; LDA #00 ; +2 cycles ; STA $0100 ; +4 cycles ; JSR Clockslide_45 ; +45 cycles ; LDA $2002 ; +3 cycles Clockslide_50: .byte $C9 ; If you start executing here, there are 38 cycles between here and the RTS instruction. (+6 for the JSR, +6 for the RTS = 50) Clockslide_49: .byte $C9 ; If you start executing here, there are 37 cycles between here and the RTS instruction. (+6 for the JSR, +6 for the RTS = 49) Clockslide_48: .byte $C9 ; If you start executing here, there are 36 cycles between here and the RTS instruction. (+6 for the JSR, +6 for the RTS = 48) Clockslide_47: .byte $C9 ; ... and so on. Clockslide_46: .byte $C9 ; In case you're wondering how this works... Clockslide_45: .byte $C9 ; opcode $C9 is for "CMP Immediate". (Which unfortunately updates the CPU status flags...) Clockslide_44: .byte $C9 ; CMP Immediate takes 2 cycles, and is also 2 bytes long. (Opcode and Operand) Clockslide_43: .byte $C9 ; ... Clockslide_42: .byte $C9 Clockslide_41: .byte $C9 Clockslide_40: .byte $C9 Clockslide_39: .byte $C9 Clockslide_38: .byte $C9 Clockslide_37: .byte $C9 Clockslide_36: .byte $C9 Clockslide_35: .byte $C9 Clockslide_34: .byte $C9 Clockslide_33: .byte $C9 Clockslide_32: .byte $C9 Clockslide_31: .byte $C9 Clockslide_30: .byte $C9 Clockslide_29: .byte $C9 Clockslide_28: .byte $C9 Clockslide_27: .byte $C9 Clockslide_26: .byte $C9 Clockslide_25: .byte $C9 Clockslide_24: .byte $C9 Clockslide_23: .byte $C9 Clockslide_22: .byte $C9 Clockslide_21: .byte $C9 Clockslide_20: .byte $C9 Clockslide_19: .byte $C9 Clockslide_18: .byte $C9 Clockslide_17: .byte $C9 Clockslide_16: .byte $C9 ; If this is executed, the $C5 is the operand. +2 cycles. Clockslide_15: .byte $C5 ; CMP