From a290c96899f96e291a516d532d34c46726d15d96 Mon Sep 17 00:00:00 2001 From: Near <77224854+near-san@users.noreply.github.com> Date: Mon, 10 May 2021 13:31:00 +0000 Subject: [PATCH] Update to ares v120r02 release. This fixes a major issue with the 68K ADD,CMP,SUB instructions not clipping, and improves the VDP scanline renderer timing somewhat. This fixes Alien Solder, and I also moved the DMA run below the FIFO/prefetch to fix Comix Zone's graphics. It regresses several Titan Overdrive 2 screens, but oh well. I'm trying to track down a bug in Popful Mail (US) where the CPU crashes right after the title screen. It seems like the CPU is getting stuck in a loop that keeps calling the same function, so the stack keeps decrementing until it ends up overwriting the currently executing code with a bad instruction. Without an I/O tracer it's really difficult to try and understand what the CPU is stuck waiting on and looping on. I'll have to add an I/O tracer to the Mega CD registers. The CPU fixes sadly didn't improve anything in the 32X space. Metal Head gets in a loop until it overwrites the stack frame return address with 0x1fff1fff that causes a return to an unaligned address that crashes the SH2 master. After Burner Complete doesn't boot at all. Amazing Spider-Man still has screwed up graphics priority ordering for the 32X VDP. Space Harrier still shows a bunch of junk lines at the top and bottom of the screen. The scanline VDP can now run Lemmings 1&2, Alien Soldier, etc. But it still does horrifically bad with Titan Overdrive, especially 2. --- ares/ares/ares.hpp | 2 +- ares/component/processor/m68k/algorithms.cpp | 52 ++++--- .../component/processor/m68k/disassembler.cpp | 41 ++++- ares/component/processor/m68k/registers.cpp | 22 ++- ares/md/vdp-performance/dma.cpp | 33 ++-- ares/md/vdp-performance/io.cpp | 142 ++++++++++-------- ares/md/vdp-performance/irq.cpp | 22 +++ ares/md/vdp-performance/main.cpp | 90 +++++++++++ ares/md/vdp-performance/serialization.cpp | 48 +++--- ares/md/vdp-performance/vdp.cpp | 75 ++------- ares/md/vdp-performance/vdp.hpp | 131 +++++++++------- ares/md/vdp/debugger.cpp | 30 +++- ares/md/vdp/dma.cpp | 3 + ares/md/vdp/fifo.cpp | 1 + ares/md/vdp/io.cpp | 2 + ares/md/vdp/irq.cpp | 6 +- ares/md/vdp/main.cpp | 2 +- ares/md/vdp/vdp.hpp | 6 +- .../{lsb-readable.hpp => lsb/readable.hpp} | 10 +- .../{lsb-writable.hpp => lsb/writable.hpp} | 15 +- ares/n64/memory/memory.hpp | 4 +- .../{msb-readable.hpp => msb/readable.hpp} | 4 +- .../{msb-writable.hpp => msb/writable.hpp} | 6 +- ares/ps1/memory/bus.hpp | 12 +- 24 files changed, 457 insertions(+), 302 deletions(-) create mode 100644 ares/md/vdp-performance/irq.cpp create mode 100644 ares/md/vdp-performance/main.cpp rename ares/n64/memory/{lsb-readable.hpp => lsb/readable.hpp} (89%) rename ares/n64/memory/{lsb-writable.hpp => lsb/writable.hpp} (87%) rename ares/n64/memory/{msb-readable.hpp => msb/readable.hpp} (94%) rename ares/n64/memory/{msb-writable.hpp => msb/writable.hpp} (93%) diff --git a/ares/ares/ares.hpp b/ares/ares/ares.hpp index 5d2ef3b752..c4baa92c08 100644 --- a/ares/ares/ares.hpp +++ b/ares/ares/ares.hpp @@ -36,7 +36,7 @@ using namespace nall; namespace ares { static const string Name = "ares"; - static const string Version = "120.1"; + static const string Version = "120.2"; static const string Copyright = "Near"; static const string License = "CC BY-NC-ND 4.0"; static const string LicenseURI = "https://creativecommons.org/licenses/by-nc-nd/4.0/"; diff --git a/ares/component/processor/m68k/algorithms.cpp b/ares/component/processor/m68k/algorithms.cpp index ed14254467..82b2c82276 100644 --- a/ares/component/processor/m68k/algorithms.cpp +++ b/ares/component/processor/m68k/algorithms.cpp @@ -1,9 +1,12 @@ template auto M68K::ADD(n32 source, n32 target) -> n32 { - auto result = (n64)source + target; - if(extend) result += r.x; - - r.c = sign(result >> 1) < 0; - r.v = sign(~(target ^ source) & (target ^ result)) < 0; + target = clip(target); + source = clip(source); + u32 result = target + source + (extend ? r.x : 0); + u32 carries = target ^ source ^ result; + u32 overflow = (target ^ result) & (source ^ result); + + r.c = (carries ^ overflow) & msb(); + r.v = overflow & msb(); r.z = clip(result) ? 0 : (extend ? r.z : 1); r.n = sign(result) < 0; r.x = r.c; @@ -12,7 +15,7 @@ template auto M68K::ADD(n32 source, n32 target) -> n32 { } template auto M68K::AND(n32 source, n32 target) -> n32 { - n32 result = target & source; + u32 result = target & source; r.c = 0; r.v = 0; @@ -24,10 +27,10 @@ template auto M68K::AND(n32 source, n32 target) -> n32 { template auto M68K::ASL(n32 result, u32 shift) -> n32 { bool carry = false; - n32 overflow = 0; + u32 overflow = 0; for(auto _ : range(shift)) { carry = result & msb(); - n32 before = result; + u32 before = result; result <<= 1; overflow |= before ^ result; } @@ -43,10 +46,10 @@ template auto M68K::ASL(n32 result, u32 shift) -> n32 { template auto M68K::ASR(n32 result, u32 shift) -> n32 { bool carry = false; - n32 overflow = 0; + u32 overflow = 0; for(auto _ : range(shift)) { carry = result & lsb(); - n32 before = result; + u32 before = result; result = sign(result) >> 1; overflow |= before ^ result; } @@ -61,10 +64,14 @@ template auto M68K::ASR(n32 result, u32 shift) -> n32 { } template auto M68K::CMP(n32 source, n32 target) -> n32 { - auto result = (n64)target - source; - - r.c = sign(result >> 1) < 0; - r.v = sign((target ^ source) & (target ^ result)) < 0; + target = clip(target); + source = clip(source); + u32 result = target - source; + u32 carries = target ^ source ^ result; + u32 overflow = (target ^ result) & (source ^ target); + + r.c = (carries ^ overflow) & msb(); + r.v = overflow & msb(); r.z = clip(result) == 0; r.n = sign(result) < 0; @@ -72,7 +79,7 @@ template auto M68K::CMP(n32 source, n32 target) -> n32 { } template auto M68K::EOR(n32 source, n32 target) -> n32 { - n32 result = target ^ source; + u32 result = target ^ source; r.c = 0; r.v = 0; @@ -115,7 +122,7 @@ template auto M68K::LSR(n32 result, u32 shift) -> n32 { } template auto M68K::OR(n32 source, n32 target) -> n32 { - auto result = target | source; + u32 result = target | source; r.c = 0; r.v = 0; @@ -192,11 +199,14 @@ template auto M68K::ROXR(n32 result, u32 shift) -> n32 { } template auto M68K::SUB(n32 source, n32 target) -> n32 { - auto result = (n64)target - source; - if(extend) result -= r.x; - - r.c = sign(result >> 1) < 0; - r.v = sign((target ^ source) & (target ^ result)) < 0; + target = clip(target); + source = clip(source); + u32 result = target - source - (extend ? r.x : 0); + u32 carries = target ^ source ^ result; + u32 overflow = (target ^ result) & (source ^ target); + + r.c = (carries ^ overflow) & msb(); + r.v = overflow & msb(); r.z = clip(result) ? 0 : (extend ? r.z : 1); r.n = sign(result) < 0; r.x = r.c; diff --git a/ares/component/processor/m68k/disassembler.cpp b/ares/component/processor/m68k/disassembler.cpp index bbaeb1490d..643f4357ad 100644 --- a/ares/component/processor/m68k/disassembler.cpp +++ b/ares/component/processor/m68k/disassembler.cpp @@ -94,7 +94,7 @@ auto M68K::_condition(n4 condition) -> string { auto M68K::disassembleInstruction(n32 pc) -> string { _pc = pc; - return {hex(_read(_pc), 4L), " ", pad(disassembleTable[_readPC()](), -60)}; //todo: exact maximum length unknown (and sub-optimal) + return {hex(_read(_pc), 4L), " ", pad(disassembleTable[_readPC()](), -49)}; } auto M68K::disassembleContext() -> string { @@ -374,16 +374,32 @@ template auto M68K::disassembleMOVEA(EffectiveAddress from, AddressReg return {"movea ", _effectiveAddress(from), ",", _addressRegister(to)}; } +//longest register list: "d0-d1,d3-d4,d6-d7/a0-a1,a3-a4,a6-a7" template auto M68K::disassembleMOVEM_TO_MEM(EffectiveAddress to) -> string { string op{"movem", _suffix(), " "}; n16 list = _readPC(); string regs; - for(u32 n : range(8)) if(list.bit(0 + n)) regs.append(_dataRegister(DataRegister{n}), ","); + for(u32 lhs = 0; lhs < 8; lhs++) { + if(!list.bit(0 + lhs)) continue; + regs.append(_dataRegister(DataRegister{lhs})); + if(lhs == 7 || !list.bit(1 + lhs)) { regs.append(","); continue; } + for(u32 rhs = lhs; rhs < 8; rhs++) { + if(rhs == 7 || !list.bit(1 + rhs)) { regs.append("-", _dataRegister(DataRegister{rhs}), ","); lhs = rhs; break; } + } + } regs.trimRight(","); if(regs && list >> 8) regs.append("/"); - for(u32 n : range(8)) if(list.bit(8 + n)) regs.append(_addressRegister(AddressRegister{n}), ","); + for(u32 lhs = 0; lhs < 8; lhs++) { + if(!list.bit(8 + lhs)) continue; + regs.append(_addressRegister(AddressRegister{lhs})); + if(lhs == 7 || !list.bit(9 + lhs)) { regs.append(","); continue; } + for(u32 rhs = lhs; rhs < 8; rhs++) { + if(rhs == 7 || !list.bit(9 + rhs)) { regs.append("-", _addressRegister(AddressRegister{rhs}), ","); lhs = rhs; break; } + } + } regs.trimRight(","); + if(!regs) regs = "-"; return {op, regs, ",", _effectiveAddress(to)}; } @@ -393,11 +409,26 @@ template auto M68K::disassembleMOVEM_TO_REG(EffectiveAddress from) -> n16 list = _readPC(); string regs; - for(u32 n : range(8)) if(list.bit(0 + n)) regs.append(_dataRegister(DataRegister{n}), ","); + for(u32 lhs = 0; lhs < 8; lhs++) { + if(!list.bit(0 + lhs)) continue; + regs.append(_dataRegister(DataRegister{lhs})); + if(lhs == 7 || !list.bit(1 + lhs)) { regs.append(","); continue; } + for(u32 rhs = lhs; rhs < 8; rhs++) { + if(rhs == 7 || !list.bit(1 + rhs)) { regs.append("-", _dataRegister(DataRegister{rhs}), ","); lhs = rhs; break; } + } + } regs.trimRight(","); if(regs && list >> 8) regs.append("/"); - for(u32 n : range(8)) if(list.bit(8 + n)) regs.append(_addressRegister(AddressRegister{n}), ","); + for(u32 lhs = 0; lhs < 8; lhs++) { + if(!list.bit(8 + lhs)) continue; + regs.append(_addressRegister(AddressRegister{lhs})); + if(lhs == 7 || !list.bit(9 + lhs)) { regs.append(","); continue; } + for(u32 rhs = lhs; rhs < 8; rhs++) { + if(rhs == 7 || !list.bit(9 + rhs)) { regs.append("-", _addressRegister(AddressRegister{rhs}), ","); lhs = rhs; break; } + } + } regs.trimRight(","); + if(!regs) regs = "-"; return {op, _effectiveAddress(from), ",", regs}; } diff --git a/ares/component/processor/m68k/registers.cpp b/ares/component/processor/m68k/registers.cpp index ac5e851cc8..63cd05b386 100644 --- a/ares/component/processor/m68k/registers.cpp +++ b/ares/component/processor/m68k/registers.cpp @@ -1,31 +1,41 @@ template auto M68K::read(DataRegister reg) -> n32 { - return clip(r.d[reg.number]); + if constexpr(Size == Byte) return (u8 )r.d[reg.number]; + if constexpr(Size == Word) return (u16)r.d[reg.number]; + if constexpr(Size == Long) return (u32)r.d[reg.number]; + unreachable; } template auto M68K::write(DataRegister reg, n32 data) -> void { - r.d[reg.number] = (r.d[reg.number] & ~mask()) | (data & mask()); + if constexpr(Size == Byte) r.d[reg.number] = r.d[reg.number] & ~0xff | data & 0xff; + if constexpr(Size == Word) r.d[reg.number] = r.d[reg.number] & ~0xffff | data & 0xffff; + if constexpr(Size == Long) r.d[reg.number] = data; } // template auto M68K::read(AddressRegister reg) -> n32 { - return sign(r.a[reg.number]); + if constexpr(Size == Byte) return (s8 )r.a[reg.number]; + if constexpr(Size == Word) return (s16)r.a[reg.number]; + if constexpr(Size == Long) return (s32)r.a[reg.number]; + unreachable; } template auto M68K::write(AddressRegister reg, n32 data) -> void { - r.a[reg.number] = sign(data); + if constexpr(Size == Byte) r.a[reg.number] = (s8 )data; + if constexpr(Size == Word) r.a[reg.number] = (s16)data; + if constexpr(Size == Long) r.a[reg.number] = (s32)data; } // -//CCR,SR unused bits cannot be set; always read out as 0 +//CCR/SR unused bits cannot be set; always read out as 0 auto M68K::readCCR() -> n8 { return r.c << 0 | r.v << 1 | r.z << 2 | r.n << 3 | r.x << 4; } auto M68K::readSR() -> n16 { - return readCCR() << 0 | r.i << 8 | r.s << 13 | r.t << 15; + return r.c << 0 | r.v << 1 | r.z << 2 | r.n << 3 | r.x << 4 | r.i << 8 | r.s << 13 | r.t << 15; } auto M68K::writeCCR(n8 ccr) -> void { diff --git a/ares/md/vdp-performance/dma.cpp b/ares/md/vdp-performance/dma.cpp index 9ee91e2198..da84dfd7b4 100644 --- a/ares/md/vdp-performance/dma.cpp +++ b/ares/md/vdp-performance/dma.cpp @@ -9,12 +9,11 @@ auto VDP::DMA::poll() -> void { } auto VDP::DMA::run() -> bool { - if(!io.enable || io.wait) return false; - if(!vdp.io.command.bit(5)) return false; - if(io.mode <= 1) return load(), true; - if(io.mode == 2) return fill(), true; - if(!vdp.io.command.bit(4)) return false; - if(io.mode == 3) return copy(), true; + if(vdp.command.pending && !io.wait) { + if(io.mode <= 1) return load(), true; + if(io.mode == 2) return fill(), true; + if(io.mode == 3) return copy(), true; + } return false; } @@ -27,37 +26,37 @@ auto VDP::DMA::load() -> void { io.source.bit(0,15)++; if(--io.length == 0) { - vdp.io.command.bit(5) = 0; + vdp.command.pending = 0; active = 0; } } auto VDP::DMA::fill() -> void { - switch(vdp.io.command.bit(0,3)) { - case 1: vdp.vram .writeByte(vdp.io.address, io.fill); break; - case 5: vdp.vsram.writeByte(vdp.io.address, io.fill); break; - case 3: vdp.cram .writeByte(vdp.io.address, io.fill); break; + switch(vdp.command.target) { + case 1: vdp.vram .writeByte(vdp.command.address, io.fill); break; + case 5: vdp.vsram.writeByte(vdp.command.address, io.fill); break; + case 3: vdp.cram .writeByte(vdp.command.address, io.fill); break; default: - debug(unusual, "[VDP] DMA::fill: io.command = 0b", binary(vdp.io.command, 6L)); + debug(unusual, "[VDP::DMA::fill]: command.target = 0x", hex(vdp.command.target)); break; } io.source.bit(0,15)++; - vdp.io.address += vdp.io.dataIncrement; + vdp.command.address += vdp.command.increment; if(--io.length == 0) { - vdp.io.command.bit(5) = 0; + vdp.command.pending = 0; } } //note: this can only copy to VRAM auto VDP::DMA::copy() -> void { auto data = vdp.vram.readByte(io.source); - vdp.vram.writeByte(vdp.io.address, data); + vdp.vram.writeByte(vdp.command.address, data); io.source.bit(0,15)++; - vdp.io.address += vdp.io.dataIncrement; + vdp.command.address += vdp.command.increment; if(--io.length == 0) { - vdp.io.command.bit(5) = 0; + vdp.command.pending = 0; } } diff --git a/ares/md/vdp-performance/io.cpp b/ares/md/vdp-performance/io.cpp index 665074b152..fdc070e588 100644 --- a/ares/md/vdp-performance/io.cpp +++ b/ares/md/vdp-performance/io.cpp @@ -18,7 +18,7 @@ auto VDP::read(n1 upper, n1 lower, n24 address, n16 data) -> n16 { if(io.interlaceMode.bit(1)) vcounter <<= 1; vcounter.bit(0) = vcounter.bit(8); } - return vcounter << 8 | (state.hdot >> 1) << 0; + return vcounter << 8 | hcounter() << 0; } //PSG @@ -62,38 +62,40 @@ auto VDP::write(n1 upper, n1 lower, n24 address, n16 data) -> void { // auto VDP::readDataPort() -> n16 { - io.commandPending = false; + command.latch = 0; + command.ready = 0; //VRAM read - if(io.command.bit(0,3) == 0) { - auto address = io.address.bit(1,16); + if(command.target == 0) { + auto address = command.address.bit(1,16); auto data = vram.read(address); - io.address += io.dataIncrement; + command.address += command.increment; return data; } //VSRAM read - if(io.command.bit(0,3) == 4) { - auto address = io.address.bit(1,6); + if(command.target == 4) { + auto address = command.address.bit(1,6); auto data = vsram.read(address); - io.address += io.dataIncrement; + command.address += command.increment; return data; } //CRAM read - if(io.command.bit(0,3) == 8) { - auto address = io.address.bit(1,6); + if(command.target == 8) { + auto address = command.address.bit(1,6); auto data = cram.read(address); - io.address += io.dataIncrement; + command.address += command.increment; return data.bit(0,2) << 1 | data.bit(3,5) << 5 | data.bit(6,8) << 9; } - debug(unusual, "[VDP] readDataPort: io.command = 0b", binary(io.command, 6L)); + debug(unusual, "[VDP::readDataPort]: command.target = 0x", hex(command.target)); return 0x0000; } auto VDP::writeDataPort(n16 data) -> void { - io.commandPending = false; + command.latch = 0; + command.ready = 0; //DMA VRAM fill if(dma.io.wait) { @@ -104,50 +106,50 @@ auto VDP::writeDataPort(n16 data) -> void { } //VRAM write - if(io.command.bit(0,3) == 1) { - auto address = io.address.bit(1,16); - if(io.address.bit(0)) data = data >> 8 | data << 8; + if(command.target == 1) { + auto address = command.address.bit(1,16); + if(command.address.bit(0)) data = data >> 8 | data << 8; vram.write(address, data); - io.address += io.dataIncrement; + command.address += command.increment; dma.poll(); return; } //VSRAM write - if(io.command.bit(0,3) == 5) { - auto address = io.address.bit(1,6); + if(command.target == 5) { + auto address = command.address.bit(1,6); //data format: ---- --yy yyyy yyyy vsram.write(address, data.bit(0,9)); - io.address += io.dataIncrement; + command.address += command.increment; return; } //CRAM write - if(io.command.bit(0,3) == 3) { - auto address = io.address.bit(1,6); + if(command.target == 3) { + auto address = command.address.bit(1,6); //data format: ---- bbb- ggg- rrr- cram.write(address, data.bit(1,3) << 0 | data.bit(5,7) << 3 | data.bit(9,11) << 6); - io.address += io.dataIncrement; + command.address += command.increment; return; } - debug(unusual, "[VDP] writeDataPort: io.command = 0b", binary(io.command, 6L)); + debug(unusual, "[VDP::writeDataPort]: command.target = 0x", hex(command.target)); } // auto VDP::readControlPort() -> n16 { - io.commandPending = false; + command.latch = 0; n16 result; result.bit( 0) = Region::PAL(); - result.bit( 1) = io.command.bit(5); //DMA active - result.bit( 2) = state.hcounter >= 1280; //horizontal blank - result.bit( 3) = state.vcounter >= screenHeight() || !io.displayEnable; //vertical blank - result.bit( 4) = io.interlaceMode.bit(0) && state.field; + result.bit( 1) = command.pending; + result.bit( 2) = hblank(); + result.bit( 3) = vblank() || !io.displayEnable; + result.bit( 4) = io.interlaceMode.bit(0) && field(); result.bit( 5) = 0; //SCOL result.bit( 6) = 0; //SOVR - result.bit( 7) = io.vblankIRQ; + result.bit( 7) = irq.vblank.pending; result.bit( 8) = 0; //FIFO full result.bit( 9) = 1; //FIFO empty result.bit(10) = 1; //constants (bits 10-15) @@ -161,24 +163,26 @@ auto VDP::readControlPort() -> n16 { auto VDP::writeControlPort(n16 data) -> void { //command write (lo) - if(io.commandPending) { - io.commandPending = false; + if(command.latch) { + command.latch = 0; - io.command.bit(2,5) = data.bit(4,7); - io.address.bit(14,16) = data.bit(0,2); + command.address.bit(14,16) = data.bit(0,2); + command.target.bit(2,3) = data.bit(4,5); + command.ready = data.bit(6) | command.target.bit(0); + command.pending = data.bit(7) & dma.io.enable; - if(!dma.io.enable) io.command.bit(5) = 0; - if(dma.io.mode == 3) dma.io.wait = false; + dma.io.wait = dma.io.mode == 2; dma.poll(); return; } - io.command.bit(0,1) = data.bit(14,15); - io.address.bit(0,13) = data.bit(0,13); + command.address.bit(0,13) = data.bit(0,13); + command.target.bit(0,1) = data.bit(14,15); + command.ready = 1; //command write (hi) if(data.bit(14,15) != 2) { - io.commandPending = true; + command.latch = 1; return; } @@ -188,21 +192,25 @@ auto VDP::writeControlPort(n16 data) -> void { //mode register 1 case 0x00: { io.displayOverlayEnable = data.bit(0); - io.counterLatch = data.bit(1); - io.horizontalBlankInterruptEnable = data.bit(4); - io.leftColumnBlank = data.bit(5); + io.counterLatch = data.bit(1); + io.videoMode4 = data.bit(2); + irq.hblank.enable = data.bit(4); + io.leftColumnBlank = data.bit(5); + + irq.poll(); return; } //mode register 2 case 0x01: { - io.videoMode = data.bit(2); - io.overscan = data.bit(3); - dma.io.enable = data.bit(4); - io.verticalBlankInterruptEnable = data.bit(5); - io.displayEnable = data.bit(6); - vram.mode = data.bit(7); - if(!dma.io.enable) io.command.bit(5) = 0; + io.videoMode5 = data.bit(2); + io.overscan = data.bit(3); + dma.io.enable = data.bit(4); + irq.vblank.enable = data.bit(5); + io.displayEnable = data.bit(6); + vram.mode = data.bit(7); + + irq.poll(); dma.poll(); return; } @@ -245,7 +253,7 @@ auto VDP::writeControlPort(n16 data) -> void { //horizontal interrupt counter case 0x0a: { - io.horizontalInterruptCounter = data.bit(0,7); + irq.hblank.frequency = data.bit(0,7); return; } @@ -253,21 +261,23 @@ auto VDP::writeControlPort(n16 data) -> void { case 0x0b: { planeA.io.horizontalScrollMode = data.bit(0,1); planeB.io.horizontalScrollMode = data.bit(0,1); - planeA.io.verticalScrollMode = data.bit(2); - planeB.io.verticalScrollMode = data.bit(2); - io.externalInterruptEnable = data.bit(3); + planeA.io.verticalScrollMode = data.bit(2); + planeB.io.verticalScrollMode = data.bit(2); + irq.external.enable = data.bit(3); + + irq.poll(); return; } //mode register 4 case 0x0c: { - io.displayWidth = data.bit(0); - io.interlaceMode = data.bit(1,2); + io.displayWidth = data.bit(0); + io.interlaceMode = data.bit(1,2); io.shadowHighlightEnable = data.bit(3); - io.externalColorEnable = data.bit(4); - io.horizontalSync = data.bit(5); - io.verticalSync = data.bit(6); - io.clockSelect = data.bit(7); + io.externalColorEnable = data.bit(4); + io.hsync = data.bit(5); + io.vsync = data.bit(6); + io.clockSelect = data.bit(7); return; } @@ -289,14 +299,14 @@ auto VDP::writeControlPort(n16 data) -> void { //data port auto-increment value case 0x0f: { - io.dataIncrement = data.bit(0,7); + command.increment = data.bit(0,7); return; } //plane size case 0x10: { - planeA.io.nametableWidth = data.bit(0,1); - planeB.io.nametableWidth = data.bit(0,1); + planeA.io.nametableWidth = data.bit(0,1); + planeB.io.nametableWidth = data.bit(0,1); planeA.io.nametableHeight = data.bit(4,5); planeB.io.nametableHeight = data.bit(4,5); return; @@ -304,14 +314,14 @@ auto VDP::writeControlPort(n16 data) -> void { //window plane horizontal position case 0x11: { - window.io.horizontalOffset = data.bit(0,4) << 4; + window.io.horizontalOffset = data.bit(0,4) << 4; window.io.horizontalDirection = data.bit(7); return; } //window plane vertical position case 0x12: { - window.io.verticalOffset = data.bit(0,4) << 3; + window.io.verticalOffset = data.bit(0,4) << 3; window.io.verticalDirection = data.bit(7); return; } @@ -343,8 +353,8 @@ auto VDP::writeControlPort(n16 data) -> void { //DMA source case 0x17: { dma.io.source.bit(16,21) = data.bit(0,5); - dma.io.mode = data.bit(6,7); - dma.io.wait = dma.io.mode.bit(1); + dma.io.mode = data.bit(6,7); + dma.io.wait = dma.io.mode.bit(1); dma.poll(); return; } diff --git a/ares/md/vdp-performance/irq.cpp b/ares/md/vdp-performance/irq.cpp new file mode 100644 index 0000000000..a103e4f61b --- /dev/null +++ b/ares/md/vdp-performance/irq.cpp @@ -0,0 +1,22 @@ +auto VDP::IRQ::poll() -> void { + if(external.enable && external.pending) { + external.pending = 0; + cpu.raise(CPU::Interrupt::External); + } + + if(hblank.enable && hblank.pending) { + hblank.pending = 0; + cpu.raise(CPU::Interrupt::HorizontalBlank); + } + + if(vblank.enable && vblank.pending) { + vblank.pending = 0; + cpu.raise(CPU::Interrupt::VerticalBlank); + } +} + +auto VDP::IRQ::power(bool reset) -> void { + external = {}; + hblank = {}; + vblank = {}; +} diff --git a/ares/md/vdp-performance/main.cpp b/ares/md/vdp-performance/main.cpp new file mode 100644 index 0000000000..fa0ef83294 --- /dev/null +++ b/ares/md/vdp-performance/main.cpp @@ -0,0 +1,90 @@ +auto VDP::step(u32 clocks) -> void { + Thread::step(clocks); + Thread::synchronize(cpu, apu); +} + +auto VDP::vtick() -> void { + if(vblank()) { + irq.hblank.counter = irq.hblank.frequency; + } else if(irq.hblank.counter-- == 0) { + irq.hblank.counter = irq.hblank.frequency; + irq.hblank.pending = 1; + irq.poll(); + } + + state.vcounter++; + if(v28()) { + if(vcounter() == 0x0e0) vblank(1); + if(vcounter() == 0x0eb && Region::NTSC()) state.vcounter = 0x1e5; + if(vcounter() == 0x103 && Region::PAL ()) state.vcounter = 0x1ca; + if(vcounter() == 0x1ff) vblank(0); + } + if(v30()) { + if(vcounter() == 0x0f0) vblank(1); + if(vcounter() == 0x200 && Region::NTSC()) state.vcounter = 0x000; + if(vcounter() == 0x10b && Region::PAL ()) state.vcounter = 0x1d2; + if(vcounter() == 0x1ff) vblank(0); + } +} + +auto VDP::hblank(bool line) -> void { + state.hblank = line; + if(hblank() == 0) { + cartridge.hblank(0); + } else { + cartridge.hblank(1); + apu.setINT(0); //timing hack + } +} + +auto VDP::vblank(bool line) -> void { + state.vblank = line; + irq.vblank.transitioned = 1; +} + +auto VDP::vedge() -> void { + if(!irq.vblank.transitioned) return; + irq.vblank.transitioned = 0; + + if(vblank() == 0) { + cartridge.vblank(0); + //apu.setINT(0); + } else { + cartridge.vblank(1); + apu.setINT(1); + irq.vblank.pending = 1; + irq.poll(); + } +} + +auto VDP::main() -> void { + if(hcounter() == 0) { + latch.displayWidth = io.displayWidth; + latch.clockSelect = io.clockSelect; + + step(512); + state.hcounter = 0x80; + } else if(hcounter() == 0x80) { + if(vcounter() < screenHeight() && !runAhead()) { + render(); + m32x.vdp.scanline(pixels(), state.vcounter); + } + + step(768); + state.hcounter = h32() ? 0xe9 : 0xe4; + hblank(1); + vtick(); + } else { + step(430); + state.hcounter = 0; + hblank(0); + vedge(); + + if(vcounter() == 0) { + state.field ^= 1; + latch.interlace = io.interlaceMode == 3; + latch.overscan = io.overscan; + frame(); + } + } +} diff --git a/ares/md/vdp-performance/serialization.cpp b/ares/md/vdp-performance/serialization.cpp index bdb7e5d929..a5cba85bd9 100644 --- a/ares/md/vdp-performance/serialization.cpp +++ b/ares/md/vdp-performance/serialization.cpp @@ -1,50 +1,50 @@ auto VDP::serialize(serializer& s) -> void { Thread::serialize(s); + s(vram); s(vsram); s(cram); s(psg); + s(irq); s(dma); s(planeA); s(window); s(planeB); s(sprite); - s(state.hdot); - s(state.hcounter); - s(state.vcounter); - s(state.field); + s(command.latch); + s(command.target); + s(command.ready); + s(command.pending); + s(command.address); + s(command.increment); - s(io.vblankIRQ); - s(io.command); - s(io.address); - s(io.commandPending); s(io.displayOverlayEnable); s(io.counterLatch); - s(io.horizontalBlankInterruptEnable); + s(io.videoMode4); s(io.leftColumnBlank); - s(io.videoMode); + s(io.videoMode5); s(io.overscan); - s(io.verticalBlankInterruptEnable); s(io.displayEnable); s(io.backgroundColor); - s(io.horizontalInterruptCounter); - s(io.externalInterruptEnable); s(io.displayWidth); s(io.interlaceMode); s(io.shadowHighlightEnable); s(io.externalColorEnable); - s(io.horizontalSync); - s(io.verticalSync); + s(io.hsync); + s(io.vsync); s(io.clockSelect); - s(io.dataIncrement); - s(latch.field); s(latch.interlace); s(latch.overscan); - s(latch.horizontalInterruptCounter); s(latch.displayWidth); s(latch.clockSelect); + + s(state.hcounter); + s(state.vcounter); + s(state.field); + s(state.hblank); + s(state.vblank); } auto VDP::PSG::serialize(serializer& s) -> void { @@ -52,6 +52,18 @@ auto VDP::PSG::serialize(serializer& s) -> void { Thread::serialize(s); } +auto VDP::IRQ::serialize(serializer& s) -> void { + s(external.enable); + s(external.pending); + s(hblank.enable); + s(hblank.pending); + s(hblank.counter); + s(hblank.frequency); + s(vblank.enable); + s(vblank.pending); + s(vblank.transitioned); +} + auto VDP::VRAM::serialize(serializer& s) -> void { s(pixels); s(memory); diff --git a/ares/md/vdp-performance/vdp.cpp b/ares/md/vdp-performance/vdp.cpp index 73baf7c0a2..2814bf6511 100644 --- a/ares/md/vdp-performance/vdp.cpp +++ b/ares/md/vdp-performance/vdp.cpp @@ -4,6 +4,8 @@ namespace ares::MegaDrive { VDP vdp; #include "psg.cpp" +#include "main.cpp" +#include "irq.cpp" #include "render.cpp" #include "memory.cpp" #include "io.cpp" @@ -44,69 +46,12 @@ auto VDP::unload() -> void { node.reset(); } -auto VDP::main() -> void { - if(state.vcounter < screenHeight()) { - step(1280); - if(!runAhead()) { - render(); - m32x.vdp.scanline(pixels(), state.vcounter); - } - if(latch.horizontalInterruptCounter-- == 0) { - latch.horizontalInterruptCounter = io.horizontalInterruptCounter; - if(io.horizontalBlankInterruptEnable) { - cpu.raise(CPU::Interrupt::HorizontalBlank); - } - } - cartridge.hblank(1); - step(430); - } else if(state.vcounter == screenHeight()) { - if(io.verticalBlankInterruptEnable) { - io.vblankIRQ = true; - cpu.raise(CPU::Interrupt::VerticalBlank); - } - cartridge.vblank(1); - apu.setINT(true); - step(1286); - cartridge.hblank(1); - apu.setINT(false); - step(424); - } else { - step(1280); - cartridge.hblank(1); - step(430); - } - - cpu.lower(CPU::Interrupt::HorizontalBlank); - cartridge.hblank(0); - - state.hdot = 0; - state.hcounter = 0; - state.vcounter++; - - if(state.vcounter == 240) { - if(latch.interlace == 0) screen->setProgressive(1); - if(latch.interlace == 1) screen->setInterlace(latch.field); - screen->setViewport(0, 0, screen->width(), screen->height()); - screen->frame(); - scheduler.exit(Event::Frame); - } else if(state.vcounter >= frameHeight()) { - state.vcounter = 0; - state.field ^= 1; - latch.field = state.field; - latch.interlace = io.interlaceMode == 3; - latch.overscan = io.overscan; - latch.displayWidth = io.displayWidth; - latch.horizontalInterruptCounter = io.horizontalInterruptCounter; - io.vblankIRQ = false; - cpu.lower(CPU::Interrupt::VerticalBlank); - cartridge.vblank(0); - } -} - -auto VDP::step(s32 clocks) -> void { - state.hcounter += clocks; - Thread::step(clocks); - Thread::synchronize(cpu, apu); +auto VDP::frame() -> void { + if(latch.interlace == 0) screen->setProgressive(1); + if(latch.interlace == 1) screen->setInterlace(field()); + screen->setViewport(0, 0, screen->width(), screen->height()); + screen->frame(); + scheduler.exit(Event::Frame); } auto VDP::power(bool reset) -> void { @@ -123,6 +68,7 @@ auto VDP::power(bool reset) -> void { for(auto& data : cram.memory) data = 0; psg.power(reset); + irq.power(reset); dma.power(); planeA.io = {}; @@ -133,9 +79,10 @@ auto VDP::power(bool reset) -> void { for(auto& object : sprite.oam) object = {}; for(auto& object : sprite.objects) object = {}; - state = {}; + command = {}; io = {}; latch = {}; + state = {}; } } diff --git a/ares/md/vdp-performance/vdp.hpp b/ares/md/vdp-performance/vdp.hpp index f0ed3e458d..34e24359fc 100644 --- a/ares/md/vdp-performance/vdp.hpp +++ b/ares/md/vdp-performance/vdp.hpp @@ -17,19 +17,30 @@ struct VDP : Thread { auto hcounter() const -> u32 { return state.hcounter; } auto vcounter() const -> u32 { return state.vcounter; } + auto field() const -> bool { return state.field; } + auto hblank() const -> bool { return state.hblank; } + auto vblank() const -> bool { return state.vblank; } auto refreshing() const -> bool { return false; } - //overrides Thread::active() for VDP DMA wait cycle detection: - //this is needed as vdp-performace runs VDP DMA from CPU thread - auto active() const -> bool { return dma.active; } + auto h32() const -> bool { return latch.displayWidth == 0; } //256-width + auto h40() const -> bool { return latch.displayWidth == 1; } //320-width + + auto v28() const -> bool { return io.overscan == 0; } //224-height + auto v30() const -> bool { return io.overscan == 1; } //240-height //vdp.cpp auto load(Node::Object) -> void; auto unload() -> void; + auto frame() -> void; + auto power(bool reset) -> void; + //main.cpp + auto step(u32 clocks) -> void; + auto vtick() -> void; + auto hblank(bool line) -> void; + auto vblank(bool line) -> void; + auto vedge() -> void; auto main() -> void; - auto step(s32 clocks) -> void; - auto power(bool reset) -> void; //render.cpp auto pixels() -> u32*; @@ -73,6 +84,33 @@ struct VDP : Thread { double volume[16]; } psg; + struct IRQ { + //irq.cpp + auto poll() -> void; + auto power(bool reset) -> void; + + //serialization.cpp + auto serialize(serializer&) -> void; + + struct External { + n1 enable; + n1 pending; + } external; + + struct Hblank { + n1 enable; + n1 pending; + n8 counter; + n8 frequency; + } hblank; + + struct Vblank { + n1 enable; + n1 pending; + n1 transitioned; + } vblank; + } irq; + struct DMA { //dma.cpp auto poll() -> void; @@ -239,71 +277,58 @@ struct VDP : Thread { Pixel pixels[512]; } sprite{*this}; - struct State { - u32* output = nullptr; - n16 hdot; - n16 hcounter; - n16 vcounter; - n1 field; - } state; - - struct IO { - //status - n1 vblankIRQ; //true after VIRQ triggers; cleared at start of next frame - - //command - n6 command; + struct Command { + n1 latch; + n4 target; + n1 ready; + n1 pending; n17 address; - n1 commandPending; + n8 increment; + } command; + struct IO { //$00 mode register 1 - n1 displayOverlayEnable; - n1 counterLatch; - n1 horizontalBlankInterruptEnable; - n1 leftColumnBlank; + n1 displayOverlayEnable; + n1 counterLatch; + n1 videoMode4; + n1 leftColumnBlank; //$01 mode register 2 - n1 videoMode; //0 = Master System; 1 = Mega Drive - n1 overscan; //0 = 224 lines; 1 = 240 lines - n1 verticalBlankInterruptEnable; - n1 displayEnable; + n1 videoMode5; + n1 overscan; + n1 displayEnable; //$07 background color - n6 backgroundColor; - - //$0a horizontal interrupt counter - n8 horizontalInterruptCounter; - - //$0b mode register 3 - n1 externalInterruptEnable; + n6 backgroundColor; //$0c mode register 4 - n1 displayWidth; //0 = H32; 1 = H40 - n2 interlaceMode; - n1 shadowHighlightEnable; - n1 externalColorEnable; - n1 horizontalSync; - n1 verticalSync; - n1 clockSelect; //0 = DCLK; 1 = EDCLK - - //$0f data port auto-increment value - n8 dataIncrement; + n1 displayWidth; //0 = H32; 1 = H40 + n2 interlaceMode; + n1 shadowHighlightEnable; + n1 externalColorEnable; + n1 hsync; + n1 vsync; + n1 clockSelect; //0 = DCLK; 1 = EDCLK } io; struct Latch { //per-frame - n1 field; - n1 interlace; - n1 overscan; - n8 horizontalInterruptCounter; + n1 interlace; + n1 overscan; //per-scanline - n1 displayWidth; - n1 clockSelect; + n1 displayWidth; + n1 clockSelect; } latch; - friend class CPU; - friend class APU; + struct State { + u32* output = nullptr; + n8 hcounter; + n9 vcounter; + n1 field; + n1 hblank; + n1 vblank; + } state; }; extern VDP vdp; diff --git a/ares/md/vdp/debugger.cpp b/ares/md/vdp/debugger.cpp index 2de0906bfe..7b5010fc2d 100644 --- a/ares/md/vdp/debugger.cpp +++ b/ares/md/vdp/debugger.cpp @@ -39,21 +39,37 @@ auto VDP::Debugger::unload() -> void { memory.cram.reset(); } -auto VDP::Debugger::interrupt(string_view type) -> void { - if(tracer.interrupt->enabled()) { - string message = {type, " SR=", cpu.r.i, " @ ", vdp.vcounter(), ",", vdp.hcounter()}; +auto VDP::Debugger::interrupt(CPU::Interrupt type) -> void { + if(unlikely(tracer.interrupt->enabled())) { + string name; + if(type == CPU::Interrupt::External ) name = "external"; + if(type == CPU::Interrupt::HorizontalBlank) name = "hblank"; + if(type == CPU::Interrupt::VerticalBlank ) name = "vblank"; + string message = {name, " SR=", cpu.r.i, " @ ", vdp.vcounter(), ",", vdp.hcounter()}; tracer.interrupt->notify(message); } } -auto VDP::Debugger::dma(string_view line) -> void { - if(tracer.dma->enabled()) { - tracer.dma->notify(line); +auto VDP::Debugger::dmaLoad(n24 source, n4 target, n17 address, n16 data) -> void { + if(unlikely(tracer.dma->enabled())) { + tracer.dma->notify({"load(", hex(source, 6L), ", ", hex(target, 1L), ":", hex(address, 5L), ", ", hex(data, 4L), ")"}); + } +} + +auto VDP::Debugger::dmaFill(n4 target, n17 address, n16 data) -> void { + if(unlikely(tracer.dma->enabled())) { + tracer.dma->notify({"fill(", hex(target, 1L), ":", hex(address, 5L), ", ", hex(data, 4L), ")"}); + } +} + +auto VDP::Debugger::dmaCopy(n22 source, n4 target, n17 address, n16 data) -> void { + if(unlikely(tracer.dma->enabled())) { + tracer.dma->notify({"copy(", hex(source, 6L), ", ", hex(target, 1L), ":", hex(address, 5L), ", ", hex(data, 4L), ")"}); } } auto VDP::Debugger::io(n5 register, n8 data) -> void { - if(tracer.io->enabled()) { + if(unlikely(tracer.io->enabled())) { static const string name[32] = { /* $00 */ "mode register 1", /* $01 */ "mode register 2", diff --git a/ares/md/vdp/dma.cpp b/ares/md/vdp/dma.cpp index 272eced644..d4ff5c3b5a 100644 --- a/ares/md/vdp/dma.cpp +++ b/ares/md/vdp/dma.cpp @@ -23,6 +23,7 @@ auto VDP::DMA::load() -> void { auto address = mode.bit(0) << 23 | source << 1; auto data = bus.read(1, 1, address); vdp.writeDataPort(data); + vdp.debugger.dmaLoad(address, vdp.command.target, vdp.command.address, data); source.bit(0,15)++; if(--length == 0) { @@ -38,6 +39,7 @@ auto VDP::DMA::fill() -> void { case 3: vdp.cram.write(vdp.command.address, data); break; case 5: vdp.vsram.write(vdp.command.address, data); break; } + vdp.debugger.dmaFill(vdp.command.target, vdp.command.address, data); source.bit(0,15)++; vdp.command.address += vdp.command.increment; @@ -57,6 +59,7 @@ auto VDP::DMA::copy() -> void { read = 0; vdp.vram.writeByte(vdp.command.address, data); + vdp.debugger.dmaCopy(source, vdp.command.target, vdp.command.address, data); source.bit(0,15)++; vdp.command.address += vdp.command.increment; diff --git a/ares/md/vdp/fifo.cpp b/ares/md/vdp/fifo.cpp index d94e2a1a1d..a1cb261648 100644 --- a/ares/md/vdp/fifo.cpp +++ b/ares/md/vdp/fifo.cpp @@ -44,6 +44,7 @@ auto VDP::FIFO::run() -> bool { slots[0].lower = 0; slots[0].upper = 0; debug(unusual, "[VDP::FIFO] write target = 0x", hex(slots[0].target)); + cpu.debugger.interrupt({"VDP FIFO ", hex(slots[0].target)}); return advance(), true; } diff --git a/ares/md/vdp/io.cpp b/ares/md/vdp/io.cpp index a5803ee43f..6c93af3c6f 100644 --- a/ares/md/vdp/io.cpp +++ b/ares/md/vdp/io.cpp @@ -281,6 +281,8 @@ auto VDP::writeControlPort(n16 data) -> void { layers.hscrollMode = data.bit(0,1); layers.vscrollMode = data.bit(2); irq.external.enable = data.bit(3); + + irq.poll(); return; } diff --git a/ares/md/vdp/irq.cpp b/ares/md/vdp/irq.cpp index acee2f5dcb..bfb1acc98a 100644 --- a/ares/md/vdp/irq.cpp +++ b/ares/md/vdp/irq.cpp @@ -1,18 +1,18 @@ auto VDP::IRQ::poll() -> void { if(external.enable && external.pending) { - vdp.debugger.interrupt("External"); + vdp.debugger.interrupt(CPU::Interrupt::External); external.pending = 0; cpu.raise(CPU::Interrupt::External); } if(hblank.enable && hblank.pending) { - vdp.debugger.interrupt("Hblank"); + vdp.debugger.interrupt(CPU::Interrupt::HorizontalBlank); hblank.pending = 0; cpu.raise(CPU::Interrupt::HorizontalBlank); } if(vblank.enable && vblank.pending) { - vdp.debugger.interrupt("Vblank"); + vdp.debugger.interrupt(CPU::Interrupt::VerticalBlank); vblank.pending = 0; cpu.raise(CPU::Interrupt::VerticalBlank); } diff --git a/ares/md/vdp/main.cpp b/ares/md/vdp/main.cpp index 820d7045ae..39df20c9a6 100644 --- a/ares/md/vdp/main.cpp +++ b/ares/md/vdp/main.cpp @@ -81,9 +81,9 @@ auto VDP::slot() -> void { state.refreshing = 0; return; } - if(dma.run()); if(fifo.run()) return; if(prefetch.run()) return; + dma.run(); } auto VDP::refresh() -> void { diff --git a/ares/md/vdp/vdp.hpp b/ares/md/vdp/vdp.hpp index 4f7aab5af0..d850be14c6 100644 --- a/ares/md/vdp/vdp.hpp +++ b/ares/md/vdp/vdp.hpp @@ -14,8 +14,10 @@ struct VDP : Thread { //debugger.cpp auto load(Node::Object) -> void; auto unload() -> void; - auto interrupt(string_view) -> void; - auto dma(string_view) -> void; + auto interrupt(CPU::Interrupt) -> void; + auto dmaLoad(n24 source, n4 target, n17 address, n16 data) -> void; + auto dmaFill(n4 target, n17 address, n16 data) -> void; + auto dmaCopy(n22 source, n4 target, n17 address, n16 data) -> void; auto io(n5 register, n8 data) -> void; struct Memory { diff --git a/ares/n64/memory/lsb-readable.hpp b/ares/n64/memory/lsb/readable.hpp similarity index 89% rename from ares/n64/memory/lsb-readable.hpp rename to ares/n64/memory/lsb/readable.hpp index f56ddd838b..ca5df2052d 100644 --- a/ares/n64/memory/lsb-readable.hpp +++ b/ares/n64/memory/lsb/readable.hpp @@ -15,7 +15,7 @@ struct Readable { auto allocate(u32 capacity, u32 fillWith = ~0) -> void { reset(); - size = capacity & ~15; + size = capacity & ~7; u32 mask = bit::round(size) - 1; maskByte = mask & ~0; maskHalf = mask & ~1; @@ -44,17 +44,12 @@ struct Readable { } } - //N64 CPU requires aligned memory accesses template auto read(u32 address) -> u64 { if constexpr(Size == Byte) return *(u8* )&data[address & maskByte ^ 3]; if constexpr(Size == Half) return *(u16*)&data[address & maskHalf ^ 2]; if constexpr(Size == Word) return *(u32*)&data[address & maskWord ^ 0]; - if constexpr(Size == Dual) { - u64 upper = read(address + 0); - u64 lower = read(address + 4); - return upper << 32 | lower << 0; - } + if constexpr(Size == Dual) return *(u64*)&data[address & maskDual ^ 0]; unreachable; } @@ -62,7 +57,6 @@ struct Readable { auto write(u32 address, u64 value) -> void { } - //N64 RSP allows unaligned memory accesses in certain cases template auto readUnaligned(u32 address) -> u64 { static_assert(Size != Byte); diff --git a/ares/n64/memory/lsb-writable.hpp b/ares/n64/memory/lsb/writable.hpp similarity index 87% rename from ares/n64/memory/lsb-writable.hpp rename to ares/n64/memory/lsb/writable.hpp index a6a60ef419..6277978cb9 100644 --- a/ares/n64/memory/lsb-writable.hpp +++ b/ares/n64/memory/lsb/writable.hpp @@ -15,7 +15,7 @@ struct Writable { auto allocate(u32 capacity, u32 fillWith = ~0) -> void { reset(); - size = capacity & ~15; + size = capacity & ~7; u32 mask = bit::round(size) - 1; maskByte = mask & ~0; maskHalf = mask & ~1; @@ -44,17 +44,12 @@ struct Writable { } } - //N64 CPU requires aligned memory accesses template auto read(u32 address) -> u64 { if constexpr(Size == Byte) return *(u8* )&data[address & maskByte ^ 3]; if constexpr(Size == Half) return *(u16*)&data[address & maskHalf ^ 2]; if constexpr(Size == Word) return *(u32*)&data[address & maskWord ^ 0]; - if constexpr(Size == Dual) { - u64 upper = read(address + 0); - u64 lower = read(address + 4); - return upper << 32 | lower << 0; - } + if constexpr(Size == Dual) return *(u64*)&data[address & maskDual ^ 0]; unreachable; } @@ -63,13 +58,9 @@ struct Writable { if constexpr(Size == Byte) *(u8* )&data[address & maskByte ^ 3] = value; if constexpr(Size == Half) *(u16*)&data[address & maskHalf ^ 2] = value; if constexpr(Size == Word) *(u32*)&data[address & maskWord ^ 0] = value; - if constexpr(Size == Dual) { - write(address + 0, value >> 32); - write(address + 4, value >> 0); - } + if constexpr(Size == Dual) *(u64*)&data[address & maskDual ^ 0] = value; } - //N64 RSP allows unaligned memory accesses in certain cases template auto readUnaligned(u32 address) -> u64 { static_assert(Size != Byte); diff --git a/ares/n64/memory/memory.hpp b/ares/n64/memory/memory.hpp index f8cb6b30cc..a0de2560f8 100644 --- a/ares/n64/memory/memory.hpp +++ b/ares/n64/memory/memory.hpp @@ -1,6 +1,6 @@ namespace Memory { - #include "lsb-readable.hpp" - #include "lsb-writable.hpp" + #include "lsb/readable.hpp" + #include "lsb/writable.hpp" #include "io.hpp" } diff --git a/ares/n64/memory/msb-readable.hpp b/ares/n64/memory/msb/readable.hpp similarity index 94% rename from ares/n64/memory/msb-readable.hpp rename to ares/n64/memory/msb/readable.hpp index 8e9d800020..1e38e30771 100644 --- a/ares/n64/memory/msb-readable.hpp +++ b/ares/n64/memory/msb/readable.hpp @@ -15,7 +15,7 @@ struct Readable { auto allocate(u32 capacity, u32 fillWith = ~0) -> void { reset(); - size = capacity & ~15; + size = capacity & ~7; u32 mask = bit::round(size) - 1; maskByte = mask & ~0; maskHalf = mask & ~1; @@ -44,7 +44,6 @@ struct Readable { } } - //N64 CPU requires aligned memory accesses template auto read(u32 address) -> u64 { if constexpr(Size == Byte) return (*(u8* )&data[address & maskByte]); @@ -58,7 +57,6 @@ struct Readable { auto write(u32 address, u64 value) -> void { } - //N64 RSP allows unaligned memory accesses in certain cases template auto readUnaligned(u32 address) -> u64 { static_assert(Size != Byte); diff --git a/ares/n64/memory/msb-writable.hpp b/ares/n64/memory/msb/writable.hpp similarity index 93% rename from ares/n64/memory/msb-writable.hpp rename to ares/n64/memory/msb/writable.hpp index 39ab4f39fa..851f0f681d 100644 --- a/ares/n64/memory/msb-writable.hpp +++ b/ares/n64/memory/msb/writable.hpp @@ -15,13 +15,13 @@ struct Writable { auto allocate(u32 capacity, u32 fillWith = ~0) -> void { reset(); - size = capacity & ~15; + size = capacity & ~7; u32 mask = bit::round(size) - 1; maskByte = mask & ~0; maskHalf = mask & ~1; maskWord = mask & ~3; maskDual = mask & ~7; - data = memory::alocate(mask + 1); + data = memory::allocate(mask + 1); fill(fillWith); } @@ -44,7 +44,6 @@ struct Writable { } } - //N64 CPU requires aligned memory accesses template auto read(u32 address) -> u64 { if constexpr(Size == Byte) return (*(u8* )&data[address & maskByte]); @@ -61,7 +60,6 @@ struct Writable { if constexpr(Size == Dual) *(u64*)&data[address & maskDual] = bswap64(value); } - //N64 RSP allows unaligned memory accesses in certain cases template auto readUnaligned(u32 address) -> u64 { static_assert(Size != Byte); diff --git a/ares/ps1/memory/bus.hpp b/ares/ps1/memory/bus.hpp index 2f780b583a..5ec62cace1 100644 --- a/ares/ps1/memory/bus.hpp +++ b/ares/ps1/memory/bus.hpp @@ -1,5 +1,7 @@ inline auto Bus::mmio(u32 address) -> Memory::Interface& { address &= 0x1fff'ffff; + if(address <= 0x007f'ffff) return cpu.ram; + if(address >= 0x1fc0'0000) return bios; if(address <= 0x1eff'ffff) return unmapped; if(address <= 0x1f7f'ffff) return expansion1; if(address <= 0x1f80'03ff) return cpu.scratchpad; @@ -24,19 +26,11 @@ inline auto Bus::mmio(u32 address) -> Memory::Interface& { template inline auto Bus::read(u32 address) -> u32 { - address &= 0x1fff'ffff; - if(address <= 0x007f'ffff) return cpu.ram.read(address); - if(address >= 0x1fc0'0000) return bios.read(address); return mmio(address).read(address); } template inline auto Bus::write(u32 address, u32 data) -> void { - address &= 0x1fff'ffff; - if(address <= 0x007f'ffff) { - if constexpr(Accuracy::CPU::Recompiler) cpu.recompiler.invalidate(address); - return cpu.ram.write(address, data); - } - if(address >= 0x1fc0'0000) return bios.write(address, data); + if constexpr(Accuracy::CPU::Recompiler) cpu.recompiler.invalidate(address); return mmio(address).write(address, data); }