From 3d857528fade1d3b5b7ef10699857c029dc08f81 Mon Sep 17 00:00:00 2001 From: Ilkka Lehtoranta Date: Wed, 24 Jun 2026 14:22:05 +0300 Subject: [PATCH 1/2] Reshape blitter paula and copper DMA latches --- .../AmigaBlitterConformanceMatrixTests.cs | 100 ++++ .../AmigaCopperConformanceMatrixTests.cs | 26 + CopperMod.Amiga.Tests/PaulaTests.cs | 181 ++++++- CopperMod.Amiga/Blitter.cs | 226 +++++---- CopperMod.Amiga/OcsDisplay.cs | 400 ++++++++++------ CopperMod.Amiga/Paula.cs | 446 +++++++++++++++--- 6 files changed, 1083 insertions(+), 296 deletions(-) diff --git a/CopperMod.Amiga.Tests/AmigaBlitterConformanceMatrixTests.cs b/CopperMod.Amiga.Tests/AmigaBlitterConformanceMatrixTests.cs index 72acbc1..7f8cce9 100644 --- a/CopperMod.Amiga.Tests/AmigaBlitterConformanceMatrixTests.cs +++ b/CopperMod.Amiga.Tests/AmigaBlitterConformanceMatrixTests.cs @@ -433,6 +433,55 @@ public void BlitterAreaModeUsesHrmNoContentionSlotTiming(object rowObject) Assert.Equal(expectedCompletion - AgnusChipSlotScheduler.SlotCycles, blitterDma[^1].GrantedCycle); } + [Fact] + public void BlitterAreaDmaLoadsSourceLatchesBeforeDestinationCommit() + { + var bus = new AmigaBus(captureBusAccesses: true); + WriteWord(bus, SourceA, 0xFF00); + WriteWord(bus, SourceB, 0x0F0F); + WriteWord(bus, SourceC, 0x3333); + ConfigureAreaBlit(bus, 0x0FCA); + EnableBlitterDma(bus); + + bus.WriteWord(0x00DFF058, 0x0041); + RunBlitterUntilIdle(bus); + + var blitterDma = bus.BusAccesses + .Where(access => access.Request.Requester == AmigaBusRequester.Blitter && + access.Request.Kind == AmigaBusAccessKind.Blitter) + .ToArray(); + + Assert.Collection( + blitterDma, + access => + { + Assert.False(access.Request.IsWrite); + Assert.Equal(SourceA, access.Request.Address); + }, + access => + { + Assert.False(access.Request.IsWrite); + Assert.Equal(SourceB, access.Request.Address); + }, + access => + { + Assert.False(access.Request.IsWrite); + Assert.Equal(SourceC, access.Request.Address); + }, + access => + { + Assert.True(access.Request.IsWrite); + Assert.Equal(DestinationD, access.Request.Address); + }); + Assert.True(blitterDma[0].CompletedCycle <= blitterDma[1].RequestedCycle); + Assert.True(blitterDma[1].CompletedCycle <= blitterDma[2].RequestedCycle); + Assert.True(blitterDma[2].CompletedCycle <= blitterDma[3].RequestedCycle); + Assert.Equal(SourceA + 2, bus.Blitter.CaptureSnapshot().SourceA); + Assert.Equal(SourceB + 2, bus.Blitter.CaptureSnapshot().SourceB); + Assert.Equal(SourceC + 2, bus.Blitter.CaptureSnapshot().SourceC); + Assert.Equal(DestinationD + 2, bus.Blitter.CaptureSnapshot().DestinationD); + } + [Fact] public void BlitterBusyClearsWhenBitplaneDmaDelaysFinalWriteSlot() { @@ -609,6 +658,57 @@ public void BlitterLineModeUsesHrmIdleCIdleDSlotCadence() } } + [Fact] + public void BlitterLineDmaLoadsPatternAndSourceBeforeDestinationCommit() + { + var bus = new AmigaBus(captureBusAccesses: true); + var baseAddress = DestinationD + 0x1B00; + WriteWord(bus, SourceB, 0x8000); + WriteWord(bus, baseAddress, 0x0000); + ConfigureLineBlit( + bus, + baseAddress, + LineRowStride, + bltcon1: 0x0001, + bModulo: 2, + channelMask: 0x0F00); + + bus.WriteWord(0x00DFF058, 0x0041); + RunBlitterUntilIdle(bus); + + var blitterDma = bus.BusAccesses + .Where(access => access.Request.Requester == AmigaBusRequester.Blitter && + access.Request.Kind == AmigaBusAccessKind.Blitter) + .ToArray(); + + Assert.Collection( + blitterDma, + access => + { + Assert.False(access.Request.IsWrite); + Assert.Equal(SourceB, access.Request.Address); + }, + access => + { + Assert.False(access.Request.IsWrite); + Assert.Equal(SourceB, access.Request.Address); + }, + access => + { + Assert.False(access.Request.IsWrite); + Assert.Equal(baseAddress, access.Request.Address); + }, + access => + { + Assert.True(access.Request.IsWrite); + Assert.Equal(baseAddress, access.Request.Address); + }); + Assert.True(blitterDma[0].CompletedCycle <= blitterDma[1].RequestedCycle); + Assert.True(blitterDma[1].CompletedCycle <= blitterDma[2].RequestedCycle); + Assert.True(blitterDma[2].CompletedCycle <= blitterDma[3].RequestedCycle); + Assert.True(IsLinePixelSet(bus, baseAddress, LineRowStride, 0, 0)); + } + [Fact] public void BlitterLineBusyClearsAtFinalWriteCompletionWithoutPolling() { diff --git a/CopperMod.Amiga.Tests/AmigaCopperConformanceMatrixTests.cs b/CopperMod.Amiga.Tests/AmigaCopperConformanceMatrixTests.cs index b2e11dc..3021c29 100644 --- a/CopperMod.Amiga.Tests/AmigaCopperConformanceMatrixTests.cs +++ b/CopperMod.Amiga.Tests/AmigaCopperConformanceMatrixTests.cs @@ -525,6 +525,32 @@ private static void CopperContendsWithBitplaneDmaSlots() Assert.Equal(AgnusChipSlotOwner.Sprite, bus.Agnus.CaptureSnapshot().LastDeniedFixedSlot?.Owner); } + [Fact] + public void LiveCopperFetchesInstructionWordsIntoInstructionLatchOrder() + { + var bus = new AmigaBus(captureBusAccesses: true, enableLiveAgnusDma: true); + WriteCopperList(bus, CopperList, (0x0180, 0x0F00), (0xFFFF, 0xFFFE)); + SetCopperPointer(bus, 1, CopperList); + bus.WriteWord(0x00DFF096, 0x8280); + + bus.AdvanceDmaTo(FrameCycles()); + + var copperDma = bus.BusAccesses + .Where(access => access.Request.Requester == AmigaBusRequester.Copper && + access.Request.Kind == AmigaBusAccessKind.Copper) + .ToArray(); + Assert.True(copperDma.Length >= 2); + Assert.False(copperDma[0].Request.IsWrite); + Assert.Equal(CopperList, copperDma[0].Request.Address); + Assert.False(copperDma[1].Request.IsWrite); + Assert.Equal(CopperList + 2, copperDma[1].Request.Address); + Assert.True(copperDma[0].CompletedCycle <= copperDma[1].RequestedCycle); + + Assert.True(copperDma.Length >= 4); + Assert.Equal(CopperList + 4, copperDma[2].Request.Address); + Assert.Equal(CopperList + 6, copperDma[3].Request.Address); + } + private static void StartLongBlit(AmigaBus bus) { bus.WriteWord(0x00DFF040, 0x09F0, 0); diff --git a/CopperMod.Amiga.Tests/PaulaTests.cs b/CopperMod.Amiga.Tests/PaulaTests.cs index 10d74e7..36e210f 100644 --- a/CopperMod.Amiga.Tests/PaulaTests.cs +++ b/CopperMod.Amiga.Tests/PaulaTests.cs @@ -23,6 +23,28 @@ public void ManualAudioDataOutputsHighThenLowByteAndRequestsInterrupt() Assert.True((bus.ReadWord(0x00DFF01E) & 0x0080) != 0); } + [Fact] + public void AudioOnlyRenderLeavesInterruptsForRegisterTimeline() + { + var bus = CreatePaulaComponentBus(); + bus.WriteWord(0x00DFF09A, 0xC080, 0); + bus.WriteWord(0x00DFF0AA, 0x7F81, 0); + var buffer = new float[4]; + + bus.Paula.RenderSample(0, buffer, 0, 2, advanceRegisterObservable: false); + bus.Paula.RenderSample(856, buffer, 1, 2, advanceRegisterObservable: false); + Assert.Empty(bus.Paula.DrainInterrupts()); + + bus.Paula.AdvanceRegisterObservableTo(856); + var interruptEvent = Assert.Single(bus.Paula.DrainInterrupts()); + + Assert.True(buffer[0] > 0.20f); + Assert.True(buffer[2] < -0.20f); + Assert.Equal(0, interruptEvent.Channel); + Assert.Equal(0x0080, interruptEvent.IntreqBit); + Assert.True((bus.Paula.Intreq & 0x0080) != 0); + } + [Fact] public void ManualAudioDataTransitionsOnExactIntegerPeriodCycles() { @@ -68,6 +90,80 @@ public void CustomRegisterReadDoesNotAdvanceMainPaulaAudioTimeline() Assert.True(buffer[2] < -0.20f); } + [Fact] + public void RegisterWakeCandidateIgnoresManualAudioSampleBoundary() + { + var bus = CreatePaulaComponentBus(); + SchedulePaulaWrite(bus, 0x0A6, 0x0003, 0); + SchedulePaulaWrite(bus, 0x0AA, 0x7F81, 0); + bus.Paula.AdvanceRegisterObservableTo(0); + var buffer = new float[4]; + + var candidate = bus.Paula.GetNextWakeCandidateCycle(0, 6); + bus.Paula.RenderSample(5, buffer, 0, 2); + bus.Paula.RenderSample(6, buffer, 1, 2); + + Assert.Null(candidate); + Assert.True(buffer[0] > 0.20f); + Assert.True(buffer[2] < -0.20f); + } + + [Fact] + public void RegisterWakeCandidateSkipsDmaLowByteSampleBoundary() + { + var bus = CreatePaulaComponentBus(); + bus.ChipRam[0x1000] = 0x7F; + bus.ChipRam[0x1001] = 0x81; + bus.ChipRam[0x1002] = 0x40; + bus.ChipRam[0x1003] = 0xC0; + SchedulePaulaWrite(bus, 0x0A2, 0x1000, 0); + SchedulePaulaWrite(bus, 0x0A4, 0x0002, 0); + SchedulePaulaWrite(bus, 0x0A6, 0x000A, 0); + SchedulePaulaWrite(bus, 0x096, 0x8201, 0); + + bus.Paula.AdvanceRegisterObservableTo(42); + var lowByteCandidate = bus.Paula.GetNextWakeCandidateCycle(42, 58); + var nextWordCandidate = bus.Paula.GetNextWakeCandidateCycle(42, 78); + + Assert.Null(lowByteCandidate); + Assert.Equal(78, nextWordCandidate); + } + + [Fact] + public void RegisterWakeCandidateKeepsAttachedSourceSampleBoundary() + { + var bus = CreatePaulaComponentBus(); + SchedulePaulaWrite(bus, 0x09E, 0x8010, 0); + SchedulePaulaWrite(bus, 0x0A6, 0x0002, 0); + SchedulePaulaWrite(bus, 0x0AA, 0x0005, 0); + bus.Paula.AdvanceRegisterObservableTo(0); + + var candidate = bus.Paula.GetNextWakeCandidateCycle(0, 4); + + Assert.Equal(4, candidate); + } + + [Fact] + public void RegisterDmaWordOutputKeepsLowByteBoundaryWhenAttachWriteIsPending() + { + var bus = CreatePaulaComponentBus(); + bus.ChipRam[0x1000] = 0x00; + bus.ChipRam[0x1001] = 0x05; + SchedulePaulaWrite(bus, 0x0A2, 0x1000, 0); + SchedulePaulaWrite(bus, 0x0A4, 0x0001, 0); + SchedulePaulaWrite(bus, 0x0A6, 0x000A, 0); + SchedulePaulaWrite(bus, 0x096, 0x8201, 0); + SchedulePaulaWrite(bus, 0x09E, 0x8010, 50); + + bus.Paula.AdvanceRegisterObservableTo(42); + var attachCandidate = bus.Paula.GetNextWakeCandidateCycle(42, 58); + bus.Paula.AdvanceRegisterObservableTo(50); + var lowByteCandidate = bus.Paula.GetNextWakeCandidateCycle(50, 58); + + Assert.Equal(50, attachCandidate); + Assert.Equal(58, lowByteCandidate); + } + [Fact] public void SerialDataReadReportsIdleTransmitEmptyWithoutReceiveBufferData() { @@ -113,6 +209,30 @@ public void CustomRegisterReadDmaFetchIsReusedWhenAudioTimelineCatchesUp() Assert.True(buffer[0] > 0.20f); } + [Fact] + public void RegisterTimelineDmaLatchIsConsumedByAudioTimelineWithoutSecondGrant() + { + var bus = CreatePaulaComponentBus(); + bus.ChipRam[0x1000] = 0x7F; + bus.ChipRam[0x1001] = 0x81; + SchedulePaulaWrite(bus, 0x0A2, 0x1000, 0); + SchedulePaulaWrite(bus, 0x0A4, 0x0001, 0); + SchedulePaulaWrite(bus, 0x0A6, 0x0064, 0); + SchedulePaulaWrite(bus, 0x096, 0x8201, 0); + + var pollCycle = 40L; + _ = bus.ReadWord(0x00DFF01E, ref pollCycle, AmigaBusAccessKind.CpuDataRead); + var grantsAfterPoll = CapturePaulaDmaGrants(bus); + var buffer = new float[2]; + + bus.Paula.RenderSample(38, buffer, 0, 2); + var grantsAfterAudio = CapturePaulaDmaGrants(bus); + + Assert.Equal(grantsAfterPoll, grantsAfterAudio); + Assert.Equal(grantsAfterPoll.Length, grantsAfterPoll.Distinct().Count()); + Assert.True(buffer[0] > 0.20f); + } + [Fact] public void RegisterReadAheadDoesNotChangeLaterManualAudioPlayback() { @@ -334,7 +454,7 @@ public void AudxperZeroIsLatchedRawAndUsesFullSixteenBitEffectivePeriod() } [Fact] - public void PartialPlaybackLiveDmaUsesConfiguredMinimumForAudioDmaRefillSlots() + public void PartialPlaybackLiveDmaUsesRawPeriodRequestsAndChannelSlotGrants() { var bus = new AmigaBus( enableLiveAgnusDma: true, @@ -352,11 +472,40 @@ public void PartialPlaybackLiveDmaUsesConfiguredMinimumForAudioDmaRefillSlots() .Where(access => access.Request.Kind == AmigaBusAccessKind.PaulaDma) .Select(access => access.RequestedCycle) .ToArray(); - Assert.Equal(new long[] { 0, 496, 992 }, requestedCycles); + Assert.Equal(new long[] { 0, 34, 488, 942 }, requestedCycles); + } + + [Fact] + public void DmaAudioBelowMinimumPeriodRepeatsLowByteUntilNextDmaWordArrives() + { + var probe = CreateMinimumPeriodDmaUnderrunBus(); + probe.Paula.AdvanceTo(1_000); + var dma = probe.BusAccesses + .Where(access => access.Request.Kind == AmigaBusAccessKind.PaulaDma) + .ToArray(); + var firstWordCycle = dma[1].CompletedCycle; + var secondWordCycle = dma[2].CompletedCycle; + var bus = CreateMinimumPeriodDmaUnderrunBus(); + var buffer = new float[8]; + + bus.Paula.RenderSample(firstWordCycle, buffer, 0, 2); + bus.Paula.RenderSample(firstWordCycle + 2, buffer, 1, 2); + bus.Paula.RenderSample(secondWordCycle - 2, buffer, 2, 2); + bus.Paula.RenderSample(secondWordCycle, buffer, 3, 2); + var lateBus = CreateMinimumPeriodDmaUnderrunBus(); + var lateBuffer = new float[2]; + lateBus.Paula.RenderSample(secondWordCycle - 2, lateBuffer, 0, 2); + var details = $"dma={string.Join(",", dma.Select(access => $"{access.RequestedCycle}->{access.CompletedCycle}"))}; samples={string.Join(",", buffer)}"; + + Assert.True(buffer[0] > 0.20f, details); + Assert.True(buffer[2] < -0.20f, details); + Assert.True(buffer[4] < -0.20f, details); + Assert.True(buffer[6] > 0.10f, details); + Assert.True(lateBuffer[0] < -0.20f, $"{details}; late={string.Join(",", lateBuffer)}"); } [Fact] - public void FullLiveDmaUsesConfiguredMinimumForAudioDmaRefillSlots() + public void FullLiveDmaUsesRawPeriodRequestsAndChannelSlotGrants() { var bus = new AmigaBus( enableLiveAgnusDma: true, @@ -374,7 +523,7 @@ public void FullLiveDmaUsesConfiguredMinimumForAudioDmaRefillSlots() .Where(access => access.Request.Kind == AmigaBusAccessKind.PaulaDma) .Select(access => access.RequestedCycle) .ToArray(); - Assert.Equal(new long[] { 0, 496, 992 }, requestedCycles); + Assert.Equal(new long[] { 0, 34, 488, 942 }, requestedCycles); } [Fact] @@ -470,6 +619,12 @@ public void AdkconPeriodAttachModulatesNextChannelPeriod() private static int CountPaulaDmaReads(AmigaBus bus) => bus.BusAccesses.Count(access => access.Request.Kind == AmigaBusAccessKind.PaulaDma); + private static (uint Address, long RequestedCycle, long GrantedCycle)[] CapturePaulaDmaGrants(AmigaBus bus) + => bus.BusAccesses + .Where(access => access.Request.Kind == AmigaBusAccessKind.PaulaDma) + .Select(access => (access.Request.Address, access.RequestedCycle, access.GrantedCycle)) + .ToArray(); + private static void ScheduleManualRewriteSequence(AmigaBus bus) { SchedulePaulaWrite(bus, 0x0A6, 0x0002, 0); @@ -501,4 +656,22 @@ private static AmigaBus CreateDefaultMinimumPaulaBus() return new AmigaBus( enableLiveAgnusDma: false); } + + private static AmigaBus CreateMinimumPeriodDmaUnderrunBus() + { + var bus = new AmigaBus( + enableLiveAgnusDma: true, + enableLiveDisplayDma: false); + bus.ChipRam[0x1000] = 0x00; + bus.ChipRam[0x1001] = 0x00; + bus.ChipRam[0x1002] = 0x7F; + bus.ChipRam[0x1003] = 0x81; + bus.ChipRam[0x1004] = 0x40; + bus.ChipRam[0x1005] = 0xC0; + SchedulePaulaWrite(bus, 0x0A2, 0x1000, 0); + SchedulePaulaWrite(bus, 0x0A4, 0x0003, 0); + SchedulePaulaWrite(bus, 0x0A6, 0x0001, 0); + SchedulePaulaWrite(bus, 0x096, 0x8201, 0); + return bus; + } } diff --git a/CopperMod.Amiga/Blitter.cs b/CopperMod.Amiga/Blitter.cs index e236fe0..cefcd5b 100644 --- a/CopperMod.Amiga/Blitter.cs +++ b/CopperMod.Amiga/Blitter.cs @@ -532,6 +532,10 @@ internal sealed class AmigaBlitter private ushort _deferredRestartBltsize; private BlitterCompiledKernel _activeKernel; private BlitterAreaKernelState _areaKernelState; + private BlitterDmaReadLatch _sourceALatch; + private BlitterDmaReadLatch _sourceBLatch; + private BlitterDmaReadLatch _sourceCLatch; + private BlitterDmaWriteLatch _destinationDLatch; public AmigaBlitter(AmigaBus bus, bool enableSpecialization = false) { @@ -632,6 +636,10 @@ public void Reset() _deferredRestartBltsize = 0; _activeKernel = default; _areaKernelState = default; + _sourceALatch = default; + _sourceBLatch = default; + _sourceCLatch = default; + _destinationDLatch = default; } public AmigaBlitterSnapshot CaptureSnapshot() @@ -1051,51 +1059,35 @@ private void StepAreaWord(long targetCycle) var rawA = _activeDataA; if (_useA) { - var read = ReadAndStep(ref _workSourceA, _step, nextReadCycle); - rawA = read.Value; - RecordBlitterDma(read.BusAccess); - nextReadCycle = read.BusAccess.CompletedCycle; - nextCycle = Math.Max(nextCycle, read.BusAccess.CompletedCycle); + _sourceALatch = LoadSourceDmaLatch(BlitterDmaSource.A, ref _workSourceA, _step, nextReadCycle); + var access = _sourceALatch.BusAccess; + rawA = ConsumeSourceDmaLatch(ref _sourceALatch); + nextReadCycle = access.CompletedCycle; + nextCycle = Math.Max(nextCycle, access.CompletedCycle); } var rawB = _activeDataB; if (_useB) { - var read = ReadAndStep(ref _workSourceB, _step, nextReadCycle); - rawB = read.Value; - RecordBlitterDma(read.BusAccess); - nextReadCycle = read.BusAccess.CompletedCycle; - nextCycle = Math.Max(nextCycle, read.BusAccess.CompletedCycle); + _sourceBLatch = LoadSourceDmaLatch(BlitterDmaSource.B, ref _workSourceB, _step, nextReadCycle); + var access = _sourceBLatch.BusAccess; + rawB = ConsumeSourceDmaLatch(ref _sourceBLatch); + nextReadCycle = access.CompletedCycle; + nextCycle = Math.Max(nextCycle, access.CompletedCycle); } var rawC = _activeDataC; if (_useC) { - var read = ReadAndStep(ref _workSourceC, _step, nextReadCycle); - rawC = read.Value; + _sourceCLatch = LoadSourceDmaLatch(BlitterDmaSource.C, ref _workSourceC, _step, nextReadCycle); + var access = _sourceCLatch.BusAccess; + rawC = ConsumeSourceDmaLatch(ref _sourceCLatch); _activeDataC = rawC; - RecordBlitterDma(read.BusAccess); - nextReadCycle = read.BusAccess.CompletedCycle; - nextCycle = Math.Max(nextCycle, read.BusAccess.CompletedCycle); + nextReadCycle = access.CompletedCycle; + nextCycle = Math.Max(nextCycle, access.CompletedCycle); } - ushort output; - if (_specializationEnabled && _activeKernel.SupportsArea) - { - output = _activeKernel.ExecuteArea(ref _areaKernelState, rawA, rawB, rawC, (ushort)mask); - _previousA = _areaKernelState.PreviousA; - _previousB = _areaKernelState.PreviousB; - _fillCarry = _areaKernelState.FillCarry; - } - else - { - if (_specializationEnabled) - { - _kernelCache.RecordFallback(); - } - - output = ExecuteAreaScalar(rawA, rawB, rawC, (ushort)mask); - } + var output = ExecuteAreaFromSourceLatches(rawA, rawB, rawC, (ushort)mask); if (output != 0) { @@ -1106,8 +1098,8 @@ private void StepAreaWord(long targetCycle) if (_useD) { var writeCycle = Math.Max(nextReadCycle, stepEnd - ChipSlotCycles); - var write = WriteAndStep(ref _workDestinationD, _step, output, writeCycle); - RecordBlitterDma(write); + _destinationDLatch = CreateDestinationDmaLatch(output); + var write = CommitDestinationDmaLatch(ref _workDestinationD, _step, ref _destinationDLatch, writeCycle); nextCycle = Math.Max(nextCycle, write.CompletedCycle); } @@ -1167,33 +1159,27 @@ private void StepLinePixel(long targetCycle) var nextReadCycle = _useB ? stepStart : stepStart + ChipSlotCycles; if (_useB) { - var firstB = ReadLineBPattern(nextReadCycle); - nextReadCycle = firstB.BusAccess.CompletedCycle; - nextCycle = Math.Max(nextCycle, firstB.BusAccess.CompletedCycle); - var secondB = ReadLineBPattern(nextReadCycle); - _dataB = secondB.Value; - nextReadCycle = secondB.BusAccess.CompletedCycle; - nextCycle = Math.Max(nextCycle, secondB.BusAccess.CompletedCycle); + _sourceBLatch = LoadLineBPatternLatch(nextReadCycle); + var firstBAccess = _sourceBLatch.BusAccess; + _ = ConsumeSourceDmaLatch(ref _sourceBLatch); + nextReadCycle = firstBAccess.CompletedCycle; + nextCycle = Math.Max(nextCycle, firstBAccess.CompletedCycle); + _sourceBLatch = LoadLineBPatternLatch(nextReadCycle); + var secondBAccess = _sourceBLatch.BusAccess; + _dataB = ConsumeSourceDmaLatch(ref _sourceBLatch); + nextReadCycle = secondBAccess.CompletedCycle; + nextCycle = Math.Max(nextCycle, secondBAccess.CompletedCycle); _workSourceB = _bus.AddChipDmaPointerOffset(_workSourceB, _lineBPatternStride); } - var read = _bus.ReadChipWordForDeviceWithResult( - AmigaBusRequester.Blitter, - AmigaBusAccessKind.Blitter, - _workSourceC, - nextReadCycle); - RecordBlitterDma(read.BusAccess); - nextCycle = Math.Max(nextCycle, read.BusAccess.CompletedCycle); + _sourceCLatch = LoadSourceDmaLatch(BlitterDmaSource.C, _workSourceC, nextReadCycle); + var sourceCAccess = _sourceCLatch.BusAccess; + var sourceC = ConsumeSourceDmaLatch(ref _sourceCLatch); + nextCycle = Math.Max(nextCycle, sourceCAccess.CompletedCycle); var lineMask = RotateRight(_dataA, _lineBit); var textureBit = (_dataB & (0x8000 >> ((_shiftB + _lineIndex) & 0x0F))) != 0; var texture = textureBit ? (ushort)0xFFFF : (ushort)0; - var output = _specializationEnabled && _activeKernel.SupportsLine - ? _activeKernel.ExecuteLine(lineMask, texture, read.Value) - : ApplyMinterm(_minterm, lineMask, texture, read.Value); - if (_specializationEnabled && !_activeKernel.SupportsLine) - { - _kernelCache.RecordFallback(); - } + var output = ExecuteLineFromSourceLatches(lineMask, texture, sourceC); if (output != 0) { @@ -1201,13 +1187,11 @@ private void StepLinePixel(long targetCycle) } var destination = _lineIndex == 0 ? _workDestinationD : _workSourceC; - var write = _bus.WriteChipWordForDeviceWithResult( - AmigaBusRequester.Blitter, - AmigaBusAccessKind.Blitter, + _destinationDLatch = CreateDestinationDmaLatch(output); + var write = CommitDestinationDmaLatch( destination, - output, - Math.Max(read.BusAccess.CompletedCycle, stepEnd - ChipSlotCycles)); - RecordBlitterDma(write); + ref _destinationDLatch, + Math.Max(sourceCAccess.CompletedCycle, stepEnd - ChipSlotCycles)); nextCycle = Math.Max(nextCycle, write.CompletedCycle); _lineLastDrawnY = _lineY; } @@ -1224,17 +1208,6 @@ private void StepLinePixel(long targetCycle) StepLineAddress(); } - private AmigaDeviceWordReadResult ReadLineBPattern(long cycle) - { - var read = _bus.ReadChipWordForDeviceWithResult( - AmigaBusRequester.Blitter, - AmigaBusAccessKind.Blitter, - _workSourceB, - cycle); - RecordBlitterDma(read.BusAccess); - return read; - } - private void StepLineAddress() { if (_lineSign) @@ -1472,6 +1445,40 @@ private void RecordBlitterDma(AmigaBusAccessResult access) _completedMicroOps++; } + private ushort ExecuteAreaFromSourceLatches(ushort rawA, ushort rawB, ushort rawC, ushort mask) + { + if (_specializationEnabled && _activeKernel.SupportsArea) + { + var output = _activeKernel.ExecuteArea(ref _areaKernelState, rawA, rawB, rawC, mask); + _previousA = _areaKernelState.PreviousA; + _previousB = _areaKernelState.PreviousB; + _fillCarry = _areaKernelState.FillCarry; + return output; + } + + if (_specializationEnabled) + { + _kernelCache.RecordFallback(); + } + + return ExecuteAreaScalar(rawA, rawB, rawC, mask); + } + + private ushort ExecuteLineFromSourceLatches(ushort lineMask, ushort texture, ushort sourceC) + { + if (_specializationEnabled && _activeKernel.SupportsLine) + { + return _activeKernel.ExecuteLine(lineMask, texture, sourceC); + } + + if (_specializationEnabled) + { + _kernelCache.RecordFallback(); + } + + return ApplyMinterm(_minterm, lineMask, texture, sourceC); + } + private ushort ExecuteAreaScalar(ushort rawA, ushort rawB, ushort rawC, ushort mask) { rawA = (ushort)(rawA & mask); @@ -1504,26 +1511,54 @@ private ushort ApplyFill(ushort value) return output; } - private AmigaDeviceWordReadResult ReadAndStep(ref uint pointer, int step, long cycle) + private BlitterDmaReadLatch LoadSourceDmaLatch(BlitterDmaSource source, ref uint pointer, int step, long cycle) + { + var latch = LoadSourceDmaLatch(source, GetEffectiveBlitterAddress(pointer), cycle); + pointer = _bus.AddChipDmaPointerOffset(pointer, step); + return latch; + } + + private BlitterDmaReadLatch LoadLineBPatternLatch(long cycle) + => LoadSourceDmaLatch(BlitterDmaSource.B, _workSourceB, cycle); + + private BlitterDmaReadLatch LoadSourceDmaLatch(BlitterDmaSource source, uint address, long cycle) { var value = _bus.ReadChipWordForDeviceWithResult( AmigaBusRequester.Blitter, AmigaBusAccessKind.Blitter, - GetEffectiveBlitterAddress(pointer), + GetEffectiveBlitterAddress(address), cycle); - pointer = _bus.AddChipDmaPointerOffset(pointer, step); + RecordBlitterDma(value.BusAccess); + return new BlitterDmaReadLatch(source, value.Value, value.BusAccess); + } + + private static ushort ConsumeSourceDmaLatch(ref BlitterDmaReadLatch latch) + { + var value = latch.Value; + latch = default; return value; } - private AmigaBusAccessResult WriteAndStep(ref uint pointer, int step, ushort value, long cycle) + private static BlitterDmaWriteLatch CreateDestinationDmaLatch(ushort value) + => new BlitterDmaWriteLatch(value); + + private AmigaBusAccessResult CommitDestinationDmaLatch(ref uint pointer, int step, ref BlitterDmaWriteLatch latch, long cycle) + { + var access = CommitDestinationDmaLatch(GetEffectiveBlitterAddress(pointer), ref latch, cycle); + pointer = _bus.AddChipDmaPointerOffset(pointer, step); + return access; + } + + private AmigaBusAccessResult CommitDestinationDmaLatch(uint address, ref BlitterDmaWriteLatch latch, long cycle) { var access = _bus.WriteChipWordForDeviceWithResult( AmigaBusRequester.Blitter, AmigaBusAccessKind.Blitter, - GetEffectiveBlitterAddress(pointer), - value, + GetEffectiveBlitterAddress(address), + latch.Value, cycle); - pointer = _bus.AddChipDmaPointerOffset(pointer, step); + RecordBlitterDma(access); + latch = default; return access; } @@ -1552,6 +1587,39 @@ private static ushort ShiftSource(ushort current, ref ushort previous, int shift private static ushort ApplyMinterm(byte minterm, ushort sourceA, ushort sourceB, ushort sourceC) => BlitterKernelMath.ApplyMinterm(minterm, sourceA, sourceB, sourceC); + private enum BlitterDmaSource + { + A, + B, + C + } + + private readonly struct BlitterDmaReadLatch + { + public BlitterDmaReadLatch(BlitterDmaSource source, ushort value, AmigaBusAccessResult busAccess) + { + Source = source; + Value = value; + BusAccess = busAccess; + } + + public BlitterDmaSource Source { get; } + + public ushort Value { get; } + + public AmigaBusAccessResult BusAccess { get; } + } + + private readonly struct BlitterDmaWriteLatch + { + public BlitterDmaWriteLatch(ushort value) + { + Value = value; + } + + public ushort Value { get; } + } + private readonly struct DeferredRegisterWrite { public DeferredRegisterWrite(ushort offset, ushort value) diff --git a/CopperMod.Amiga/OcsDisplay.cs b/CopperMod.Amiga/OcsDisplay.cs index 6c29dd9..e132c42 100644 --- a/CopperMod.Amiga/OcsDisplay.cs +++ b/CopperMod.Amiga/OcsDisplay.cs @@ -1701,6 +1701,7 @@ private void InvalidateLiveDisplayEventCycle() { _liveNextDisplayEventValid = false; _liveNextDisplayEventCycle = long.MaxValue; + _bus.InvalidateLiveDisplayHrmGrantCache(); InvalidateLiveCopperWaitCycle(); InvalidateLiveWorkCycle(); } @@ -3167,88 +3168,87 @@ private void StepLiveCopper(long targetCycle) return; } - var fetchCycle = Math.Min(_liveCopper.Cycle, targetCycle); - var first = _bus.ReadLiveCopperDmaWord(_liveCopper.Pc, fetchCycle, out var firstAccess); - fetchCycle = firstAccess.CompletedCycle; - var dataRequestCycle = Math.Max( - fetchCycle, - firstAccess.RequestedCycle + CopperHpToCpuCycles(CopperInstructionDataHpUnits)); - var secondAddress = AddDmaPointerOffset(_liveCopper.Pc, 2); - var second = _bus.ReadLiveCopperDmaWord(secondAddress, dataRequestCycle, out var secondAccess); - var dataCycle = secondAccess.GrantedCycle; - var instructionStopCycle = Math.Max( - secondAccess.CompletedCycle, - firstAccess.RequestedCycle + CopperHpToCpuCycles(CopperMoveHpUnits)); - var controlInstructionStopCycle = Math.Max( - secondAccess.CompletedCycle, - firstAccess.RequestedCycle + CopperHpToCpuCycles(CopperSkipHpUnits)); + var instruction = LoadLiveCopperInstruction(_liveCopper.Pc, Math.Min(_liveCopper.Cycle, targetCycle)); _liveCopper.Pc = AddDmaPointerOffset(_liveCopper.Pc, 4); - if (first == 0xFFFF && second == 0xFFFE) + if (instruction.IsEnd) { _liveCopper.Stopped = true; - _liveCopper.Cycle = instructionStopCycle; + _liveCopper.Cycle = instruction.MoveStopCycle; return; } - if ((first & 1) == 0) + if (instruction.IsMove) { - var register = (ushort)(first & 0x01FE); + var register = instruction.MoveRegister; var suppressMove = _liveCopper.SuppressNextMove; _liveCopper.SuppressNextMove = false; - if (dataCycle > targetCycle) + if (instruction.DataCycle > targetCycle) { _liveCopper.PendingMove = true; _liveCopper.PendingMoveRegister = register; - _liveCopper.PendingMoveValue = second; - _liveCopper.PendingMoveCycle = dataCycle; - _liveCopper.PendingMoveStopCycle = instructionStopCycle; + _liveCopper.PendingMoveValue = instruction.Second; + _liveCopper.PendingMoveCycle = instruction.DataCycle; + _liveCopper.PendingMoveStopCycle = instruction.MoveStopCycle; _liveCopper.PendingMoveSuppress = suppressMove; - _liveCopper.Cycle = dataCycle; + _liveCopper.Cycle = instruction.DataCycle; InvalidateLiveDisplayEventCycle(); return; } - if (dataCycle <= targetCycle) + if (instruction.DataCycle <= targetCycle) { - ApplyLiveCopperMove(register, second, dataCycle, instructionStopCycle, suppressMove); + ApplyLiveCopperMove(register, instruction.Second, instruction.DataCycle, instruction.MoveStopCycle, suppressMove); } - _liveCopper.Cycle = instructionStopCycle; + _liveCopper.Cycle = instruction.MoveStopCycle; return; } - if ((second & 1) == 0) + if (instruction.IsWait) { - _liveCopper.Cycle = controlInstructionStopCycle; - _liveCopper.Wait(first, second); + _liveCopper.Cycle = instruction.ControlStopCycle; + _liveCopper.Wait(instruction.First, instruction.Second); return; } - if (controlInstructionStopCycle > targetCycle) + if (instruction.ControlStopCycle > targetCycle) { _liveCopper.PendingSkip = true; - _liveCopper.PendingSkipFirst = first; - _liveCopper.PendingSkipSecond = second; - _liveCopper.PendingSkipCycle = controlInstructionStopCycle; - _liveCopper.Cycle = controlInstructionStopCycle; + _liveCopper.PendingSkipFirst = instruction.First; + _liveCopper.PendingSkipSecond = instruction.Second; + _liveCopper.PendingSkipCycle = instruction.ControlStopCycle; + _liveCopper.Cycle = instruction.ControlStopCycle; InvalidateLiveDisplayEventCycle(); return; } if (IsCopperComparisonSatisfied( - first, - second, + instruction.First, + instruction.Second, _liveFrameStartCycle, - controlInstructionStopCycle, - IsCopperBlitterFinishedForWait(second))) + instruction.ControlStopCycle, + IsCopperBlitterFinishedForWait(instruction.Second))) { _liveCopper.SuppressNextMove = true; } - _liveCopper.Cycle = controlInstructionStopCycle; + _liveCopper.Cycle = instruction.ControlStopCycle; + } + + private CopperInstructionLatch LoadLiveCopperInstruction(uint pc, long fetchCycle) + { + var first = _bus.ReadLiveCopperDmaWord(pc, fetchCycle, out var firstAccess); + var secondRequestCycle = GetCopperSecondWordRequestCycle(firstAccess); + var second = _bus.ReadLiveCopperDmaWord(AddDmaPointerOffset(pc, 2), secondRequestCycle, out var secondAccess); + return new CopperInstructionLatch(first, firstAccess, second, secondAccess); } + private static long GetCopperSecondWordRequestCycle(AmigaBusAccessResult firstAccess) + => Math.Max( + firstAccess.CompletedCycle, + firstAccess.RequestedCycle + CopperHpToCpuCycles(CopperInstructionDataHpUnits)); + private void CompletePendingLiveCopperSkip(long targetCycle) { if (!_liveCopper.PendingSkip || _liveCopper.PendingSkipCycle > targetCycle) @@ -4884,6 +4884,8 @@ private bool TryRenderTimelineLowResLineFastPath( } DisplayTimelineState? firstState = null; + var lineXStart = AmigaConstants.PalLowResWidth; + var lineXStop = 0; for (var segmentIndex = 0; segmentIndex < line.SegmentCount; segmentIndex++) { var segment = line.Segments[segmentIndex]; @@ -4900,6 +4902,30 @@ private bool TryRenderTimelineLowResLineFastPath( } firstState ??= state; + lineXStart = Math.Min(lineXStart, segment.XStart); + lineXStop = Math.Max(lineXStop, segment.XStop); + } + + if (firstState is null) + { + return true; + } + + var indexState = firstState; + ApplyTimelineStateForRendering(indexState); + _displayWindowVerticallyOpen = indexState.DisplayWindowVerticallyOpen; + _displayWindowStateLine = StandardVStart + row + 1; + _currentRenderRow = row; + if (!TryPrepareTimelineLowResFastBitplanes( + row, + lineXStart, + lineXStop, + indexState, + timeline, + out var dataFirstX, + out var dataLastX)) + { + return false; } for (var segmentIndex = 0; segmentIndex < line.SegmentCount; segmentIndex++) @@ -4917,10 +4943,7 @@ private bool TryRenderTimelineLowResLineFastPath( _currentRenderRow = row; CapturePaletteFrameSpans(row, row + 1, segment.XStart, segment.XStop); FillRows(bgra, row, row + 1, segment.XStart, segment.XStop); - if (!TryRenderTimelineLowResFastBitplanes(bgra, row, segment, state, timeline)) - { - return false; - } + WritePreparedTimelineLowResFastBitplanes(bgra, row, segment.XStart, segment.XStop, dataFirstX, dataLastX); } return true; @@ -4964,22 +4987,33 @@ private static bool HasSameBitplaneDataRegisters(DisplayTimelineState left, Disp private bool IsTimelineLowResLineFastPathSupported(int row, DisplayLineSegment segment, DisplayTimelineState state) { if ((state.Bplcon0 & 0x8804) != 0 || - (state.Bplcon1 & 0x00FF) != 0 || HasBitplaneDataSpanInBand(row, row + 1, segment.XStart, segment.XStop)) { return false; } + var dualPlayfield = (state.Bplcon0 & 0x0400) != 0; + var planeCount = Math.Clamp(state.DecodePlaneCount, 0, LiveBitplanePlaneCount); + if ((state.Bplcon1 & 0x00FF) != 0 && + (dualPlayfield || !TryGetUniformNormalPlayfieldScroll(state, planeCount, out _))) + { + return false; + } + return true; } - private bool TryRenderTimelineLowResFastBitplanes( - Span bgra, + private bool TryPrepareTimelineLowResFastBitplanes( int row, - DisplayLineSegment segment, + int xStart, + int xStop, DisplayTimelineState state, - DisplayFrameTimeline timeline) + DisplayFrameTimeline timeline, + out int dataFirstX, + out int dataLastX) { + dataFirstX = 0; + dataLastX = 0; if (state.PlaneCount <= 0 || !IsBitplaneDmaEnabled(state.Dmacon)) { return true; @@ -5011,8 +5045,8 @@ private bool TryRenderTimelineLowResFastBitplanes( } var originX = GetDataFetchStartX(window); - var clipLeft = Math.Max(Math.Max(0, window.X), segment.XStart); - var clipRight = Math.Min(Math.Min(AmigaConstants.PalLowResWidth, window.X + window.Width), segment.XStop); + var clipLeft = Math.Max(Math.Max(0, window.X), xStart); + var clipRight = Math.Min(Math.Min(AmigaConstants.PalLowResWidth, window.X + window.Width), xStop); if (clipRight <= clipLeft) { return true; @@ -5022,16 +5056,25 @@ private bool TryRenderTimelineLowResFastBitplanes( Array.Clear(_timelineFastPathPriorityMasks, clipLeft, clipRight - clipLeft); var dualPlayfield = (state.Bplcon0 & 0x0400) != 0; + var normalPlayfieldScroll = 0; + if (!dualPlayfield) + { + _ = TryGetUniformNormalPlayfieldScroll(state, planeCount, out normalPlayfieldScroll); + } + var fetchPixels = fetchWords * PlanarChunkPixels; - var firstX = Math.Max(clipLeft, originX); - var lastX = Math.Min(clipRight, originX + fetchPixels); + var dataOriginX = originX + normalPlayfieldScroll; + var firstX = Math.Max(clipLeft, dataOriginX); + var lastX = Math.Min(clipRight, dataOriginX + fetchPixels); if (lastX <= firstX) { return true; } - var firstWord = Math.Clamp((firstX - originX) >> 4, 0, fetchWords - 1); - var lastWord = Math.Clamp((lastX - 1 - originX) >> 4, 0, fetchWords - 1); + dataFirstX = firstX; + dataLastX = lastX; + var firstWord = Math.Clamp((firstX - dataOriginX) >> 4, 0, fetchWords - 1); + var lastWord = Math.Clamp((lastX - 1 - dataOriginX) >> 4, 0, fetchWords - 1); for (var word = firstWord; word <= lastWord; word++) { if (!TryGetTimelineDecodedChunk(row, word, state, planeCount, dualPlayfield, timeline, out var chunk)) @@ -5039,10 +5082,10 @@ private bool TryRenderTimelineLowResFastBitplanes( return false; } - var wordStart = originX + (word * PlanarChunkPixels); - var xStart = Math.Max(firstX, wordStart); - var xStop = Math.Min(lastX, wordStart + PlanarChunkPixels); - for (var x = xStart; x < xStop; x++) + var wordStart = dataOriginX + (word * PlanarChunkPixels); + var chunkXStart = Math.Max(firstX, wordStart); + var chunkXStop = Math.Min(lastX, wordStart + PlanarChunkPixels); + for (var x = chunkXStart; x < chunkXStop; x++) { var offset = x - wordStart; _timelineFastPathColorIndexes[x] = chunk.GetColorIndex(offset); @@ -5059,13 +5102,27 @@ private bool TryRenderTimelineLowResFastBitplanes( { RecordBitplanePixel(colorIndex, priorityMask, x, row); } - - WriteLowResolutionOutputPixel(bgra, x, row, _convertedColors[colorIndex]); } return true; } + private void WritePreparedTimelineLowResFastBitplanes( + Span bgra, + int row, + int segmentXStart, + int segmentXStop, + int dataFirstX, + int dataLastX) + { + var xStart = Math.Max(segmentXStart, dataFirstX); + var xStop = Math.Min(segmentXStop, dataLastX); + for (var x = xStart; x < xStop; x++) + { + WriteLowResolutionOutputPixel(bgra, x, row, _convertedColors[_timelineFastPathColorIndexes[x]]); + } + } + private bool TryRenderTimelineCachedBitplanes( Span bgra, int row, @@ -5109,49 +5166,37 @@ private bool TryRenderTimelineCachedBitplanes( } var zeroScroll = (state.Bplcon1 & 0x00FF) == 0; + var normalPlayfieldScroll = 0; + var uniformNormalScroll = !dualPlayfield && TryGetUniformNormalPlayfieldScroll(state, planeCount, out normalPlayfieldScroll); + var useChunkedScroll = zeroScroll || uniformNormalScroll; var renderHighWidth = IsRenderingHighResolutionWidth(); var renderHighHeight = IsRenderingHighResolutionHeight(); var renderInterlace = (state.Bplcon0 & 0x0004) != 0; var lastX = Math.Min(clipRight, originX + drawPixels + (highResolution ? 8 : 16)); - for (var x = Math.Max(clipLeft, originX); x < lastX; x++) + if (highResolution) { - var relativeX = x - originX; - if (relativeX < -15 || relativeX >= drawPixels + (highResolution ? 8 : 16)) + for (var x = Math.Max(clipLeft, originX); x < lastX; x++) { - continue; - } - - int colorIndex; - byte priorityMask; - if (highResolution) - { - if (!TryGetTimelineBitplaneColorIndex( - row, - x, - originX, - fetchPixels, - fetchWords, - state, - planeCount, - timeline, - out var leftColorIndex, - hiresSubPixel: 0) || - !TryGetTimelineBitplaneColorIndex( - row, - x, - originX, - fetchPixels, - fetchWords, - state, - planeCount, - timeline, - out var rightColorIndex, - hiresSubPixel: 1)) + var relativeSubPixel = (x - originX) * 2; + var leftColorIndex = 0; + var rightColorIndex = 0; + if ((uint)relativeSubPixel < (uint)fetchPixels) { - return false; + var word = relativeSubPixel >> 4; + if ((uint)word < (uint)fetchWords) + { + if (!TryGetTimelineDecodedChunk(row, word, state, planeCount, dualPlayfield: false, timeline, out var chunk)) + { + return false; + } + + var offset = relativeSubPixel & 0x0F; + leftColorIndex = chunk.GetColorIndex(offset); + rightColorIndex = chunk.GetColorIndex(offset + 1); + } } - priorityMask = (leftColorIndex | rightColorIndex) == 0 ? (byte)0 : NormalPlayfieldPriorityMask; + var priorityMask = (leftColorIndex | rightColorIndex) == 0 ? (byte)0 : NormalPlayfieldPriorityMask; SetPlayfieldPriorityMask(x, row, priorityMask); if ((leftColorIndex | rightColorIndex) != 0) { @@ -5187,18 +5232,30 @@ private bool TryRenderTimelineCachedBitplanes( renderInterlace, _renderInterlaceField); } + } + + return true; + } + for (var x = Math.Max(clipLeft, originX); x < lastX; x++) + { + var relativeX = x - originX; + if (relativeX < -15 || relativeX >= drawPixels + 16) + { continue; } - if (zeroScroll) + int colorIndex; + byte priorityMask; + if (useChunkedScroll) { - if ((uint)relativeX >= (uint)fetchPixels) + var scrolledRelativeX = uniformNormalScroll ? relativeX - normalPlayfieldScroll : relativeX; + if ((uint)scrolledRelativeX >= (uint)fetchPixels) { continue; } - var word = relativeX >> 4; + var word = scrolledRelativeX >> 4; if ((uint)word >= (uint)fetchWords) { continue; @@ -5209,7 +5266,7 @@ private bool TryRenderTimelineCachedBitplanes( return false; } - var offset = relativeX & 0x0F; + var offset = scrolledRelativeX & 0x0F; colorIndex = chunk.GetColorIndex(offset); priorityMask = chunk.GetPriorityMask(offset); } @@ -5246,6 +5303,33 @@ private bool TryRenderTimelineCachedBitplanes( return true; } + private static bool TryGetUniformNormalPlayfieldScroll(DisplayTimelineState state, int planeCount, out int scroll) + { + var evenScroll = state.Bplcon1 & 0x0F; + var oddScroll = (state.Bplcon1 >> 4) & 0x0F; + var hasEvenPlane = false; + var hasOddPlane = false; + for (var plane = 0; plane < planeCount; plane++) + { + if ((state.PlaneHasRowMask & (1 << plane)) == 0) + { + continue; + } + + if ((plane & 1) == 0) + { + hasEvenPlane = true; + } + else + { + hasOddPlane = true; + } + } + + scroll = hasEvenPlane ? evenScroll : oddScroll; + return !hasEvenPlane || !hasOddPlane || evenScroll == oddScroll; + } + private bool TryGetTimelineBitplaneColorIndex( int row, int x, @@ -5995,92 +6079,87 @@ private void StepCopperInstruction( ref int renderCursorPixelDelay, ref CopperPresentationState copper) { - var fetchCycle = Math.Min(copper.Cycle, frameStopCycle); - var first = ReadCopperWordForPresentation(copper.Pc, fetchCycle, out var firstAccess); - fetchCycle = firstAccess.CompletedCycle; - var dataRequestCycle = Math.Max( - fetchCycle, - firstAccess.RequestedCycle + CopperHpToCpuCycles(CopperInstructionDataHpUnits)); - var second = ReadCopperWordForPresentation(AddDmaPointerOffset(copper.Pc, 2), dataRequestCycle, out var secondAccess); - var dataCycle = secondAccess.GrantedCycle; - var instructionStopCycle = Math.Max( - secondAccess.CompletedCycle, - firstAccess.RequestedCycle + CopperHpToCpuCycles(CopperMoveHpUnits)); - var controlInstructionStopCycle = Math.Max( - secondAccess.CompletedCycle, - firstAccess.RequestedCycle + CopperHpToCpuCycles(CopperSkipHpUnits)); + var instruction = LoadPresentationCopperInstruction(copper.Pc, Math.Min(copper.Cycle, frameStopCycle)); copper.Pc = AddDmaPointerOffset(copper.Pc, 4); - if (first == 0xFFFF && second == 0xFFFE) + if (instruction.IsEnd) { copper.Stopped = true; return; } - if ((first & 1) == 0) + if (instruction.IsMove) { - var register = (ushort)(first & 0x01FE); + var register = instruction.MoveRegister; var writePixelDelay = GetCopperWritePixelDelay(register); - var clippedWritePixelDelay = dataCycle <= frameStopCycle ? writePixelDelay : 0; + var clippedWritePixelDelay = instruction.DataCycle <= frameStopCycle ? writePixelDelay : 0; RenderPresentationSpan( bgra, frameStartCycle, renderCursorCycle, - Math.Min(dataCycle, frameStopCycle), + Math.Min(instruction.DataCycle, frameStopCycle), useTimedWrites, renderCursorPixelDelay, clippedWritePixelDelay); - renderCursorCycle = Math.Max(renderCursorCycle, Math.Min(dataCycle, frameStopCycle)); + renderCursorCycle = Math.Max(renderCursorCycle, Math.Min(instruction.DataCycle, frameStopCycle)); renderCursorPixelDelay = clippedWritePixelDelay; - if (dataCycle <= frameStopCycle) + if (instruction.DataCycle <= frameStopCycle) { var suppressMove = copper.SuppressNextMove; copper.SuppressNextMove = false; if (IsCopperDangerStopRegister(register)) { copper.Stopped = true; - copper.Cycle = instructionStopCycle; + copper.Cycle = instruction.MoveStopCycle; return; } if (!suppressMove && CanCopperWriteRegister(register)) { - _currentCopperRow = GetOutputRowForCycle(frameStartCycle, dataCycle); - ApplyCopperMove(register, second, dataCycle, applyHardwareSideEffects: false); + _currentCopperRow = GetOutputRowForCycle(frameStartCycle, instruction.DataCycle); + ApplyCopperMove(register, instruction.Second, instruction.DataCycle, applyHardwareSideEffects: false); if (register == 0x088) { - copper.JumpTo(_copperListPointer, dataCycle); + copper.JumpTo(_copperListPointer, instruction.DataCycle); } else if (register == 0x08A) { - copper.JumpTo(_copperListPointer2, dataCycle); + copper.JumpTo(_copperListPointer2, instruction.DataCycle); } } } - copper.Cycle = instructionStopCycle; + copper.Cycle = instruction.MoveStopCycle; return; } - if ((second & 1) == 0) + if (instruction.IsWait) { - copper.Cycle = controlInstructionStopCycle; - copper.Wait(first, second); + copper.Cycle = instruction.ControlStopCycle; + copper.Wait(instruction.First, instruction.Second); return; } - if (controlInstructionStopCycle <= frameStopCycle && + if (instruction.ControlStopCycle <= frameStopCycle && IsCopperComparisonSatisfied( - first, - second, + instruction.First, + instruction.Second, frameStartCycle, - controlInstructionStopCycle, - IsCopperBlitterFinishedForWait(second))) + instruction.ControlStopCycle, + IsCopperBlitterFinishedForWait(instruction.Second))) { copper.SuppressNextMove = true; } - copper.Cycle = controlInstructionStopCycle; + copper.Cycle = instruction.ControlStopCycle; + } + + private CopperInstructionLatch LoadPresentationCopperInstruction(uint pc, long fetchCycle) + { + var first = ReadCopperWordForPresentation(pc, fetchCycle, out var firstAccess); + var secondRequestCycle = GetCopperSecondWordRequestCycle(firstAccess); + var second = ReadCopperWordForPresentation(AddDmaPointerOffset(pc, 2), secondRequestCycle, out var secondAccess); + return new CopperInstructionLatch(first, firstAccess, second, secondAccess); } private bool TryPeekPendingWrite(out PendingCustomWrite write) @@ -7609,7 +7688,7 @@ private void RecordTimelineDisplayWrite(long cycle, ushort offset, bool isCopper return; } - if (IsTimelineBitplanePointerWrite(offset)) + if (IsTimelineCopperJumpWrite(offset)) { return; } @@ -7698,6 +7777,11 @@ private static bool IsTimelineUnsafeDisplayWrite(ushort offset) return false; } + if (IsTimelineCopperJumpWrite(offset)) + { + return false; + } + if (IsTimelineBitplanePointerWrite(offset)) { return false; @@ -7729,6 +7813,12 @@ private static bool IsTimelineCopperPointerLatchWrite(ushort offset) return offset is 0x080 or 0x082 or 0x084 or 0x086; } + private static bool IsTimelineCopperJumpWrite(ushort offset) + { + offset = (ushort)(offset & 0x01FE); + return offset is 0x088 or 0x08A; + } + private static bool IsTimelineBitplanePointerWrite(ushort offset) { offset = (ushort)(offset & 0x01FE); @@ -11010,6 +11100,44 @@ public void StartFrom(uint pc) } } + private readonly struct CopperInstructionLatch + { + public CopperInstructionLatch( + ushort first, + AmigaBusAccessResult firstAccess, + ushort second, + AmigaBusAccessResult secondAccess) + { + First = first; + Second = second; + DataCycle = secondAccess.GrantedCycle; + MoveStopCycle = Math.Max( + secondAccess.CompletedCycle, + firstAccess.RequestedCycle + CopperHpToCpuCycles(CopperMoveHpUnits)); + ControlStopCycle = Math.Max( + secondAccess.CompletedCycle, + firstAccess.RequestedCycle + CopperHpToCpuCycles(CopperSkipHpUnits)); + } + + public ushort First { get; } + + public ushort Second { get; } + + public long DataCycle { get; } + + public long MoveStopCycle { get; } + + public long ControlStopCycle { get; } + + public bool IsEnd => First == 0xFFFF && Second == 0xFFFE; + + public bool IsMove => (First & 1) == 0; + + public bool IsWait => (Second & 1) == 0; + + public ushort MoveRegister => (ushort)(First & 0x01FE); + } + private readonly struct PendingCustomWrite { public PendingCustomWrite(long cycle, ushort offset, ushort value, bool isCopper = false) diff --git a/CopperMod.Amiga/Paula.cs b/CopperMod.Amiga/Paula.cs index 4fbff30..e59b629 100644 --- a/CopperMod.Amiga/Paula.cs +++ b/CopperMod.Amiga/Paula.cs @@ -18,7 +18,7 @@ internal sealed class Paula private readonly PaulaTimelineState _audioTimeline = new PaulaTimelineState(); private readonly PaulaTimelineState _registerTimeline = new PaulaTimelineState(); private readonly List _pendingWrites = new List(); - private readonly List _dmaFetches = new List(); + private readonly PaulaDmaReadLatchQueue[] _dmaReadLatchQueues = CreateDmaReadLatchQueues(); private readonly List _pendingInterrupts = new List(); private readonly PaulaInterruptEvent[] _drainedInterruptBuffer = new PaulaInterruptEvent[MaxPendingInterruptEvents]; private readonly ReusableReadOnlyList _drainedInterrupts = new ReusableReadOnlyList(); @@ -35,6 +35,8 @@ internal sealed class Paula private float[][]? _captureSamples; private int _captureFrameIndex; private int _captureSampleRate; + private long _startDmaWordOutputCount; + private bool _hotCounterDiagnosticsEnabled; public Paula(AmigaBus bus) { @@ -47,6 +49,13 @@ public Paula(AmigaBus bus) public ushort Dmacon => _registerTimeline.Dmacon; + internal long StartDmaWordOutputCount => _startDmaWordOutputCount; + + internal bool HotCounterDiagnosticsEnabled + { + set => _hotCounterDiagnosticsEnabled = value; + } + public ushort Intena => _registerTimeline.Intena; public ushort Intreq => _registerTimeline.Intreq; @@ -68,7 +77,7 @@ public void Reset() { Array.Clear(_registerBytes); _pendingWrites.Clear(); - _dmaFetches.Clear(); + ResetDmaReadLatchQueues(); _pendingInterrupts.Clear(); _writes.Clear(); Array.Clear(_cpuInterruptReleaseCycles); @@ -82,6 +91,7 @@ public void Reset() _captureSamples = null; _captureFrameIndex = 0; _captureSampleRate = 0; + _startDmaWordOutputCount = 0; } public byte ReadByte(ushort offset) @@ -355,10 +365,19 @@ public void BeginChannelCapture(int frames, int sampleRate) return result; } - public void RenderSample(long targetCycle, Span destination, int frame, int channels) + public void RenderSample( + long targetCycle, + Span destination, + int frame, + int channels, + bool advanceRegisterObservable = true) { AdvanceAudioTo(targetCycle); - AdvanceRegisterObservableTo(targetCycle); + if (advanceRegisterObservable) + { + AdvanceRegisterObservableTo(targetCycle); + } + var left = 0.0f; var right = 0.0f; var capture = _captureSamples; @@ -465,7 +484,7 @@ private void AdvanceTimelineTo(PaulaTimelineState timeline, long targetCycle, Pa } CompactPendingWrites(); - CompactDmaFetches(); + CompactDmaReadLatches(); } private void ApplyRegisterWritesTo(long targetCycle) @@ -489,7 +508,7 @@ private void ApplyRegisterWritesTo(long targetCycle) RefreshCpuInterruptVisibility(targetCycle); InvalidateRegisterWakeCandidateCache(); CompactPendingWrites(); - CompactDmaFetches(); + CompactDmaReadLatches(); } private bool HasPendingWriteThrough(PaulaTimelineState timeline, long targetCycle) @@ -720,6 +739,11 @@ private void RefreshCpuInterruptVisibility(long cycle) { cycle = Math.Max(0, cycle); var active = ActiveInterruptBits; + if (active == _lastCpuActiveInterruptBits) + { + return; + } + var newlyActive = (ushort)(active & ~_lastCpuActiveInterruptBits); for (var bitIndex = 0; bitIndex < _cpuInterruptReleaseCycles.Length; bitIndex++) { @@ -874,15 +898,6 @@ private static long GetPeriodCycles(int period) return GetEffectivePeriod(period) * AmigaConstants.A500PalCpuCyclesPerColorClock; } - private static long GetDmaPeriodCycles(int period, AmigaBus bus) - { - var effectivePeriod = GetEffectivePeriod(period); - var dmaPeriod = UsesAudioDmaRefillMinimum(bus) - ? Math.Max(effectivePeriod, bus.AudioDmaMinimumPeriod) - : effectivePeriod; - return dmaPeriod * AmigaConstants.A500PalCpuCyclesPerColorClock; - } - private static long GetEffectivePeriod(int period) { if (period == 0) @@ -894,7 +909,7 @@ private static long GetEffectivePeriod(int period) return period; } - private static bool UsesAudioDmaRefillMinimum(AmigaBus bus) + private static bool UsesLiveAgnusAudioDma(AmigaBus bus) => bus.LiveAgnusDmaEnabled; private void InvalidateRegisterWakeCandidateCache() @@ -922,7 +937,9 @@ private long GetRegisterWakeCandidateCycle() for (var i = 0; i < _registerTimeline.Channels.Length; i++) { - var channelCandidate = _registerTimeline.Channels[i].GetNextWakeCandidateCycle(); + var channelCandidate = _registerTimeline.Channels[i].GetNextRegisterWakeCandidateCycle( + _registerTimeline, + this); if (channelCandidate.HasValue) { candidate = Math.Min(candidate, channelCandidate.Value); @@ -972,45 +989,44 @@ private void CompactPendingWrites() _registerTimeline.PendingWriteIndex -= consumed; } - private PaulaDmaFetchRecord GetOrCreateDmaFetchRecord(int channel, uint address, long requestedCycle, PaulaTimelineKind kind) + private PaulaDmaReadLatch GetOrCreateDmaReadLatch(int channel, uint address, long requestedCycle, PaulaTimelineKind kind) { address = _bus.MaskChipDmaAddress(address); - for (var i = 0; i < _dmaFetches.Count; i++) + var queue = _dmaReadLatchQueues[channel]; + if (queue.TryConsume(address, requestedCycle, kind, out var cachedLatch)) { - var fetch = _dmaFetches[i]; - if (fetch.Channel == channel && - fetch.Address == address && - fetch.RequestedCycle == requestedCycle && - !fetch.IsConsumed(kind)) - { - fetch.MarkConsumed(kind); - return fetch; - } + return cachedLatch; } - var read = _bus.ReadPaulaDmaWord(address, requestedCycle); - var record = new PaulaDmaFetchRecord(channel, address, requestedCycle, read); - record.MarkConsumed(kind); - _dmaFetches.Add(record); - return record; + var read = _bus.ReadPaulaDmaWord(channel, address, requestedCycle); + var latch = new PaulaDmaReadLatch(channel, address, requestedCycle, read); + queue.AddConsumed(latch, kind); + return latch; } - private void CompactDmaFetches() + private void CompactDmaReadLatches() { - if (_dmaFetches.Count < 128) + for (var i = 0; i < _dmaReadLatchQueues.Length; i++) { - return; + _dmaReadLatchQueues[i].CompactConsumedPrefix(); } + } - var removeCount = 0; - while (removeCount < _dmaFetches.Count && _dmaFetches[removeCount].ConsumedByBoth) + private void RecordStartDmaWordOutput() + { + if (!_hotCounterDiagnosticsEnabled) { - removeCount++; + return; } - if (removeCount != 0) + _startDmaWordOutputCount++; + } + + private void ResetDmaReadLatchQueues() + { + for (var i = 0; i < _dmaReadLatchQueues.Length; i++) { - _dmaFetches.RemoveRange(0, removeCount); + _dmaReadLatchQueues[i].Reset(); } } @@ -1073,23 +1089,155 @@ public void Reset() } } - private sealed class PaulaDmaFetchRecord + private static PaulaDmaReadLatchQueue[] CreateDmaReadLatchQueues() { - public PaulaDmaFetchRecord(int channel, uint address, long requestedCycle, PaulaDmaReadResult read) + var queues = new PaulaDmaReadLatchQueue[AmigaConstants.PaulaChannelCount]; + for (var i = 0; i < queues.Length; i++) { - Channel = channel; - Address = address; - RequestedCycle = requestedCycle; - Read = read; + queues[i] = new PaulaDmaReadLatchQueue(); } - public int Channel { get; } + return queues; + } - public uint Address { get; } + private sealed class PaulaDmaReadLatchQueue + { + private const int InitialCapacity = 16; + private PaulaDmaReadLatchRecord[] _records = new PaulaDmaReadLatchRecord[InitialCapacity]; + private int _start; + private int _count; - public long RequestedCycle { get; } + public void Reset() + { + _start = 0; + _count = 0; + } + + public bool TryConsume(uint address, long requestedCycle, PaulaTimelineKind kind, out PaulaDmaReadLatch latch) + { + CompactConsumedPrefix(); + if (_count == 0) + { + latch = default; + return false; + } + + if (TryConsumeAt(0, address, requestedCycle, kind, out latch)) + { + CompactConsumedPrefix(); + return true; + } + + if (_count == 1) + { + return false; + } + + var last = _count - 1; + if (TryConsumeAt(last, address, requestedCycle, kind, out latch)) + { + return true; + } + + if (requestedCycle > _records[IndexOf(last)].Latch.RequestedCycle) + { + return false; + } + + for (var i = 1; i < last; i++) + { + if (TryConsumeAt(i, address, requestedCycle, kind, out latch)) + { + return true; + } + } + + return false; + } + + public void AddConsumed(PaulaDmaReadLatch latch, PaulaTimelineKind kind) + { + CompactConsumedPrefix(); + EnsureCapacity(_count + 1); + ref var record = ref _records[IndexOf(_count)]; + record = new PaulaDmaReadLatchRecord(latch); + record.MarkConsumed(kind); + _count++; + } + + public void CompactConsumedPrefix() + { + while (_count != 0 && _records[_start].ConsumedByBoth) + { + _records[_start] = default; + _start++; + if (_start == _records.Length) + { + _start = 0; + } + + _count--; + } + + if (_count == 0) + { + _start = 0; + } + } + + private bool TryConsumeAt( + int logicalIndex, + uint address, + long requestedCycle, + PaulaTimelineKind kind, + out PaulaDmaReadLatch latch) + { + ref var record = ref _records[IndexOf(logicalIndex)]; + if (!record.Matches(address, requestedCycle) || record.IsConsumed(kind)) + { + latch = default; + return false; + } + + record.MarkConsumed(kind); + latch = record.Latch; + return true; + } + + private void EnsureCapacity(int required) + { + if (required <= _records.Length) + { + return; + } + + var next = new PaulaDmaReadLatchRecord[_records.Length * 2]; + for (var i = 0; i < _count; i++) + { + next[i] = _records[IndexOf(i)]; + } + + _records = next; + _start = 0; + } + + private int IndexOf(int logicalIndex) + { + var index = _start + logicalIndex; + return index < _records.Length ? index : index - _records.Length; + } + } + + private struct PaulaDmaReadLatchRecord + { + public PaulaDmaReadLatchRecord(PaulaDmaReadLatch latch) + { + Latch = latch; + AudioConsumed = false; + RegisterConsumed = false; + } - public PaulaDmaReadResult Read { get; } + public PaulaDmaReadLatch Latch { get; } public bool AudioConsumed { get; private set; } @@ -1097,6 +1245,9 @@ public PaulaDmaFetchRecord(int channel, uint address, long requestedCycle, Paula public bool ConsumedByBoth => AudioConsumed && RegisterConsumed; + public bool Matches(uint address, long requestedCycle) + => Latch.Address == address && Latch.RequestedCycle == requestedCycle; + public bool IsConsumed(PaulaTimelineKind kind) => kind == PaulaTimelineKind.Audio ? AudioConsumed : RegisterConsumed; @@ -1113,6 +1264,28 @@ public void MarkConsumed(PaulaTimelineKind kind) } } + private readonly struct PaulaDmaReadLatch + { + public PaulaDmaReadLatch(int channel, uint address, long requestedCycle, PaulaDmaReadResult read) + { + Channel = channel; + Address = address; + RequestedCycle = requestedCycle; + Value = read.Value; + LoadCycle = read.BusAccess.CompletedCycle; + } + + public int Channel { get; } + + public uint Address { get; } + + public long RequestedCycle { get; } + + public ushort Value { get; } + + public long LoadCycle { get; } + } + private readonly struct PendingWrite { public PendingWrite(long cycle, ushort offset, ushort value) @@ -1136,9 +1309,9 @@ private sealed class PaulaChannel private bool _nextByteIsLow; private long _nextSampleCycle; private long _nextDmaFetchCycle; - private ushort _prefetchedDmaWord; + private PaulaDmaReadLatch _prefetchedDmaLatch; private bool _hasPrefetchedDmaWord; - private ushort _pendingDmaWord; + private PaulaDmaReadLatch _pendingDmaLatch; private bool _hasPendingDmaWord; private long _pendingDmaLoadCycle; private long _pendingDmaNextFetchCycle; @@ -1180,9 +1353,9 @@ public void Reset() _nextByteIsLow = false; _nextSampleCycle = 0; _nextDmaFetchCycle = long.MaxValue; - _prefetchedDmaWord = 0; + _prefetchedDmaLatch = default; _hasPrefetchedDmaWord = false; - _pendingDmaWord = 0; + _pendingDmaLatch = default; _hasPendingDmaWord = false; _pendingDmaLoadCycle = long.MaxValue; _pendingDmaNextFetchCycle = long.MaxValue; @@ -1250,7 +1423,7 @@ public void AdvanceTo(long targetCycle, AmigaBus bus, Paula paula, PaulaTimeline if (_pendingDmaLoadCycle <= targetCycle && (!sampleDue || _pendingDmaLoadCycle <= _nextSampleCycle)) { - CompletePendingDmaWord(bus, paula, timeline, kind); + CompletePendingDmaWord(bus, paula, timeline, kind, targetCycle); continue; } @@ -1276,9 +1449,12 @@ public void AdvanceTo(long targetCycle, AmigaBus bus, Paula paula, PaulaTimeline _nextByteIsLow = false; paula.ApplyModulationFrom(timeline, Index, _dataWord); _nextSampleCycle += GetPeriodCycles(Period); - if (DmaEnabled && UsesAudioDmaRefillMinimum(bus)) + if (DmaEnabled && + UsesLiveAgnusAudioDma(bus) && + !_hasPrefetchedDmaWord && + _hasPendingDmaWord) { - _nextSampleCycle = Math.Max(_nextSampleCycle, _nextDmaFetchCycle); + _nextSampleCycle = Math.Max(_nextSampleCycle, _pendingDmaLoadCycle); } continue; @@ -1288,15 +1464,16 @@ public void AdvanceTo(long targetCycle, AmigaBus bus, Paula paula, PaulaTimeline { if (_hasPrefetchedDmaWord) { - var word = _prefetchedDmaWord; + var latch = _prefetchedDmaLatch; ClearPrefetchedDmaWord(); StartDmaWordOutput( - word, + latch, _nextSampleCycle, bus, paula, timeline, - kind); + kind, + targetCycle); continue; } @@ -1312,7 +1489,16 @@ public void AdvanceTo(long targetCycle, AmigaBus bus, Paula paula, PaulaTimeline forceInterrupt: false); } + if (_hasPrefetchedDmaWord) + { + continue; + } + _nextSampleCycle += GetPeriodCycles(Period); + if (UsesLiveAgnusAudioDma(bus) && _hasPendingDmaWord) + { + _nextSampleCycle = Math.Max(_nextSampleCycle, _pendingDmaLoadCycle); + } } else { @@ -1338,6 +1524,48 @@ public void AdvanceTo(long targetCycle, AmigaBus bus, Paula paula, PaulaTimeline return candidate; } + public long? GetNextRegisterWakeCandidateCycle(PaulaTimelineState timeline, Paula paula) + { + long? candidate = null; + if (_hasPendingDmaWord) + { + candidate = _pendingDmaLoadCycle; + } + + if (_hasDataWord && _nextByteIsLow && paula.IsAttachedSource(timeline, Index)) + { + candidate = MinWakeCandidate(candidate, _nextSampleCycle); + } + + if (DmaEnabled) + { + candidate = MinWakeCandidate(candidate, GetNextDmaRegisterBoundaryCycle()); + } + + return candidate; + } + + private long? GetNextDmaRegisterBoundaryCycle() + { + if (!_hasDataWord) + { + return _hasPrefetchedDmaWord ? _nextSampleCycle : null; + } + + if (!_nextByteIsLow) + { + return _nextSampleCycle; + } + + var nextWordCycle = _nextSampleCycle + GetPeriodCycles(Period); + if (_nextDmaFetchCycle != long.MaxValue) + { + nextWordCycle = Math.Max(nextWordCycle, _nextDmaFetchCycle); + } + + return nextWordCycle; + } + public PaulaChannelSnapshot GetSnapshot() { return new PaulaChannelSnapshot( @@ -1370,7 +1598,7 @@ private void RequestDmaWord( return; } - var requestCycle = UsesAudioDmaRefillMinimum(bus) + var requestCycle = UsesLiveAgnusAudioDma(bus) ? Math.Max(cycle, _nextDmaFetchCycle) : cycle; var interruptCount = 0; @@ -1381,14 +1609,13 @@ private void RequestDmaWord( interruptCount++; } - var dmaFetch = paula.GetOrCreateDmaFetchRecord(Index, _currentAddress, requestCycle, kind); - var dmaRead = dmaFetch.Read; + var dmaLatch = paula.GetOrCreateDmaReadLatch(Index, _currentAddress, requestCycle, kind); _currentAddress = bus.AddChipDmaPointerOffset(_currentAddress, 2); _remainingWords--; - _pendingDmaWord = dmaRead.Value; + _pendingDmaLatch = dmaLatch; _hasPendingDmaWord = true; - _pendingDmaLoadCycle = dmaRead.BusAccess.CompletedCycle; - _pendingDmaNextFetchCycle = requestCycle + (GetDmaPeriodCycles(Period, bus) * 2); + _pendingDmaLoadCycle = dmaLatch.LoadCycle; + _pendingDmaNextFetchCycle = requestCycle + (GetPeriodCycles(Period) * 2); _pendingDmaLoadTarget = loadTarget; _nextDmaFetchCycle = _pendingDmaNextFetchCycle; if (!_hasDataWord) @@ -1404,7 +1631,7 @@ private void RequestDmaWord( _pendingDmaInterruptCount = interruptCount; if (_pendingDmaLoadCycle <= cycle) { - CompletePendingDmaWord(bus, paula, timeline, kind); + CompletePendingDmaWord(bus, paula, timeline, kind, cycle); } } @@ -1412,11 +1639,12 @@ private void CompletePendingDmaWord( AmigaBus bus, Paula paula, PaulaTimelineState timeline, - PaulaTimelineKind kind) + PaulaTimelineKind kind, + long targetCycle) { var loadCycle = _pendingDmaLoadCycle; var interruptCount = _pendingDmaInterruptCount; - var word = _pendingDmaWord; + var latch = _pendingDmaLatch; var loadTarget = _pendingDmaLoadTarget; ClearPendingDmaWord(); for (var i = 0; i < interruptCount; i++) @@ -1436,42 +1664,106 @@ private void CompletePendingDmaWord( if (!_hasDataWord) { - StartDmaWordOutput(word, loadCycle, bus, paula, timeline, kind); + StartDmaWordOutput(latch, loadCycle, bus, paula, timeline, kind, targetCycle); return; } - _prefetchedDmaWord = word; + _prefetchedDmaLatch = latch; _hasPrefetchedDmaWord = true; } + private void StartDmaWordOutput( + PaulaDmaReadLatch latch, + long cycle, + AmigaBus bus, + Paula paula, + PaulaTimelineState timeline, + PaulaTimelineKind kind, + long targetCycle) + => StartDmaWordOutput(latch.Value, cycle, bus, paula, timeline, kind, targetCycle); + private void StartDmaWordOutput( ushort word, long cycle, AmigaBus bus, Paula paula, PaulaTimelineState timeline, - PaulaTimelineKind kind) + PaulaTimelineKind kind, + long targetCycle) { + paula.RecordStartDmaWordOutput(); _dataWord = word; _hasDataWord = true; - _nextByteIsLow = true; CurrentSample = unchecked((sbyte)(word >> 8)); - _nextSampleCycle = cycle + GetPeriodCycles(Period); + var periodCycles = GetPeriodCycles(Period); + if (kind == PaulaTimelineKind.Register && + TrySkipRegisterLowByteDmaOutput(cycle, bus, paula, timeline, periodCycles)) + { + return; + } + + _nextByteIsLow = true; + _nextSampleCycle = cycle + periodCycles; if (DmaEnabled) { RequestDmaWord(bus, cycle, paula, timeline, kind, DmaLoadTarget.Prefetch, forceInterrupt: false); } + + if (kind == PaulaTimelineKind.Audio && + _nextSampleCycle <= targetCycle && + !paula.IsAttachedSource(timeline, Index)) + { + CurrentSample = unchecked((sbyte)word); + _nextByteIsLow = false; + _nextSampleCycle += periodCycles; + if (DmaEnabled && + UsesLiveAgnusAudioDma(bus) && + !_hasPrefetchedDmaWord && + _hasPendingDmaWord) + { + _nextSampleCycle = Math.Max(_nextSampleCycle, _pendingDmaLoadCycle); + } + } + } + + private bool TrySkipRegisterLowByteDmaOutput( + long cycle, + AmigaBus bus, + Paula paula, + PaulaTimelineState timeline, + long periodCycles) + { + if (!DmaEnabled || paula.IsAttachedSource(timeline, Index)) + { + return false; + } + + var nextWordCycle = cycle + (periodCycles * 2); + if (paula.HasPendingWriteThrough(timeline, nextWordCycle)) + { + return false; + } + + _nextByteIsLow = false; + _nextSampleCycle = nextWordCycle; + RequestDmaWord(bus, cycle, paula, timeline, PaulaTimelineKind.Register, DmaLoadTarget.Prefetch, forceInterrupt: false); + if (UsesLiveAgnusAudioDma(bus) && !_hasPrefetchedDmaWord && _hasPendingDmaWord) + { + _nextSampleCycle = Math.Max(_nextSampleCycle, _pendingDmaLoadCycle); + } + + return true; } private void ClearPrefetchedDmaWord() { - _prefetchedDmaWord = 0; + _prefetchedDmaLatch = default; _hasPrefetchedDmaWord = false; } private void ClearPendingDmaWord() { - _pendingDmaWord = 0; + _pendingDmaLatch = default; _hasPendingDmaWord = false; _pendingDmaLoadCycle = long.MaxValue; _pendingDmaNextFetchCycle = long.MaxValue; From 01cd5f6b07ab2f42d6a48d0d6948af8ba12166af Mon Sep 17 00:00:00 2001 From: Ilkka Lehtoranta Date: Wed, 24 Jun 2026 14:49:55 +0300 Subject: [PATCH 2/2] Add bitplane DMA latch path --- .../AmigaBitplaneConformanceMatrixTests.cs | 40 +++++++ CopperMod.Amiga/OcsDisplay.cs | 105 +++++++++++++++--- 2 files changed, 129 insertions(+), 16 deletions(-) diff --git a/CopperMod.Amiga.Tests/AmigaBitplaneConformanceMatrixTests.cs b/CopperMod.Amiga.Tests/AmigaBitplaneConformanceMatrixTests.cs index 2e538b6..d5839d0 100644 --- a/CopperMod.Amiga.Tests/AmigaBitplaneConformanceMatrixTests.cs +++ b/CopperMod.Amiga.Tests/AmigaBitplaneConformanceMatrixTests.cs @@ -113,6 +113,9 @@ public void ExecutableBitplaneMatrixRowsPass(object rowObject) case "cycle-exact fetch slot contention at DDF edges": BitplaneFetchesUseDisplayDmaSlots(); break; + case "bitplane DMA latch is consumed after granted fetch": + BitplaneDmaLatchIsConsumedAfterGrantedFetch(); + break; case "HRM lowres bitplane slot order": BitplaneFetchesUseHrmLowResSlotOrder(); break; @@ -186,6 +189,7 @@ public void PendingBitplaneRowsDocumentTheirReason(object rowObject) Executable("palette", "live DMA capture preserves same-line Copper palette spans"), Executable("dma-control", "DMAEN and BPLEN gate timed bitplane fetches"), Executable("fetch-window", "cycle-exact fetch slot contention at DDF edges"), + Executable("fetch-window", "bitplane DMA latch is consumed after granted fetch"), Executable("fetch-window", "HRM lowres bitplane slot order"), Executable("fetch-window", "HRM hires bitplane slot order"), Executable("dma-control", "bitplane DMA starvation of late sprite slots"), @@ -1056,6 +1060,31 @@ private static void BitplaneFetchesUseDisplayDmaSlots() Assert.Equal(0xFFFF0000u, Pixel(frame, StandardX, StandardY)); } + private static void BitplaneDmaLatchIsConsumedAfterGrantedFetch() + { + var bus = CreateDisplayBus(); + SetBitplanePointer(bus, 0, 0x1000); + for (var offset = 0; offset < 0x400; offset += 2) + { + BigEndian.WriteUInt16(bus.ChipRam, 0x1000 + offset, 0x8000); + } + + bus.WriteWord(0x00DFF092, 0x0038); + bus.WriteWord(0x00DFF094, 0x0038); + bus.WriteWord(0x00DFF096, 0x8300); + bus.WriteWord(0x00DFF100, 0x1000); + var frame = new uint[AmigaConstants.PalLowResWidth * AmigaConstants.PalLowResHeight]; + + bus.Display.RenderFrame(frame, 0, FrameCycles()); + + var latch = GetPrivateField(bus.Display, "_bitplaneDmaReadLatch"); + var hasValue = (bool)latch.GetType().GetProperty("HasValue")!.GetValue(latch)!; + var snapshot = bus.Display.CaptureSnapshot(); + Assert.False(hasValue); + Assert.True(snapshot.LastBitplaneDmaFetches > 0); + Assert.Equal(0xFFFF0000u, Pixel(frame, StandardX, StandardY)); + } + private static void LowResDdfStopSecondHalfIncludesContainingFetchBlock() { var bus = CreateDisplayBus(); @@ -1563,6 +1592,17 @@ private static void SetPrivateField(object target, string name, object value) field.SetValue(target, value); } + private static T GetPrivateField(object target, string name) + { + var field = target.GetType().GetField( + name, + System.Reflection.BindingFlags.Instance | + System.Reflection.BindingFlags.Public | + System.Reflection.BindingFlags.NonPublic); + Assert.NotNull(field); + return Assert.IsAssignableFrom(field.GetValue(target)); + } + private static int OutputXForHorizontal(int horizontal) { return Math.Clamp((horizontal - 0x38) * 2, 0, AmigaConstants.PalLowResWidth); diff --git a/CopperMod.Amiga/OcsDisplay.cs b/CopperMod.Amiga/OcsDisplay.cs index e132c42..bd803a7 100644 --- a/CopperMod.Amiga/OcsDisplay.cs +++ b/CopperMod.Amiga/OcsDisplay.cs @@ -70,6 +70,7 @@ internal sealed class OcsDisplay private readonly bool[] _renderPlaneHasRow = new bool[6]; private readonly ushort[] _bitplaneDataRegisters = new ushort[6]; private readonly bool[] _bitplaneDataRegisterWritten = new bool[6]; + private BitplaneDmaReadLatch _bitplaneDmaReadLatch; private readonly SpriteState[] _sprites = new SpriteState[8]; private readonly List _spriteFrameCommands = new List(MaxSpriteFrameCommands * 8); private readonly List[] _spriteCommandScratch = new List[8]; @@ -3948,26 +3949,19 @@ private bool CaptureKnownLiveBitplaneFetchesThrough(long targetCycle) private void CaptureLiveBitplaneFetch(int row, int plane, int word, long fetchCycle, LiveLineState state) { - ushort value = 0; - var granted = false; + BitplaneDmaReadLatch latch; if ((state.PlaneHasRowMask & (1 << plane)) != 0) { var address = unchecked(state.BitplaneRowAddresses[plane] + (uint)(word * 2)); - if (_bus.TryReadLiveBitplaneDmaWord(address, fetchCycle, out value, out var grantedCycle)) - { - _liveBitplaneDmaFetches++; - RecordLiveDisplayDmaCycle(grantedCycle); - granted = true; - } + latch = LoadLiveBitplaneDmaLatch(row, plane, word, address, fetchCycle); } - - _liveBitplaneWords[GetLiveBitplaneWordIndex(row, plane, word)] = value; - _liveBitplaneWordMasks[GetLiveBitplaneMaskIndex(row, plane)] |= 1UL << word; - if (!_liveTimelineUnsafeForFrame) + else { - _displayTimeline.RecordBitplaneFetch(row, plane, word, value, granted); + latch = BitplaneDmaReadLatch.Denied(row, plane, word, fetchCycle); } - _liveFetchBatchWordCount++; + + _bitplaneDmaReadLatch = latch; + ConsumeLiveBitplaneDmaLatch(ref _bitplaneDmaReadLatch); } private void CaptureLiveBitplaneFetch(long fetchCycle) @@ -6748,9 +6742,57 @@ private ushort ReadBitplaneWordForPresentation(uint address, int row, int plane, return 0; } + _bitplaneDmaReadLatch = new BitplaneDmaReadLatch(row, plane, word, value, granted: true, access.GrantedCycle); + return ConsumePresentationBitplaneDmaLatch(ref _bitplaneDmaReadLatch); + } + + private BitplaneDmaReadLatch LoadLiveBitplaneDmaLatch(int row, int plane, int word, uint address, long fetchCycle) + { + return _bus.TryReadLiveBitplaneDmaWord(address, fetchCycle, out var value, out var grantedCycle) + ? new BitplaneDmaReadLatch(row, plane, word, value, granted: true, grantedCycle) + : BitplaneDmaReadLatch.Denied(row, plane, word, grantedCycle); + } + + private void ConsumeLiveBitplaneDmaLatch(ref BitplaneDmaReadLatch latch) + { + if (!latch.HasValue) + { + return; + } + + var row = latch.Row; + var plane = latch.Plane; + var word = latch.Word; + _liveBitplaneWords[GetLiveBitplaneWordIndex(row, plane, word)] = latch.Value; + _liveBitplaneWordMasks[GetLiveBitplaneMaskIndex(row, plane)] |= 1UL << word; + if (latch.Granted) + { + _liveBitplaneDmaFetches++; + RecordLiveDisplayDmaCycle(latch.GrantedCycle); + } + + if (!_liveTimelineUnsafeForFrame) + { + _displayTimeline.RecordBitplaneFetch(row, plane, word, latch.Value, latch.Granted); + } + + _liveFetchBatchWordCount++; + latch = default; + } + + private ushort ConsumePresentationBitplaneDmaLatch(ref BitplaneDmaReadLatch latch) + { + if (!latch.HasValue || !latch.Granted) + { + latch = default; + return 0; + } + _lastBitplaneDmaFetches++; - RecordDisplayDmaCycle(access.GrantedCycle); - LoadBitplaneDataRegister(plane, value); + RecordDisplayDmaCycle(latch.GrantedCycle); + LoadBitplaneDataRegister(latch.Plane, latch.Value); + var value = latch.Value; + latch = default; return value; } @@ -11242,6 +11284,37 @@ public bool HasSameRenderingAs(SpriteFrameCommand other) } } + private readonly struct BitplaneDmaReadLatch + { + public BitplaneDmaReadLatch(int row, int plane, int word, ushort value, bool granted, long grantedCycle) + { + Row = row; + Plane = plane; + Word = word; + Value = value; + Granted = granted; + GrantedCycle = grantedCycle; + HasValue = true; + } + + public static BitplaneDmaReadLatch Denied(int row, int plane, int word, long grantedCycle) + => new BitplaneDmaReadLatch(row, plane, word, 0, granted: false, grantedCycle); + + public int Row { get; } + + public int Plane { get; } + + public int Word { get; } + + public ushort Value { get; } + + public bool Granted { get; } + + public long GrantedCycle { get; } + + public bool HasValue { get; } + } + private sealed class LiveLineState { public int Generation;