diff --git a/Copper68k/M68020Interpreter.cs b/Copper68k/M68020Interpreter.cs index 1c6281b..f1fff5f 100644 --- a/Copper68k/M68020Interpreter.cs +++ b/Copper68k/M68020Interpreter.cs @@ -3710,7 +3710,9 @@ private void ExecuteByteBranch(ushort opcode) if (CheckCondition(condition)) { - State.ProgramCounter = unchecked((uint)(branchBase + displacement)); + var target = unchecked((uint)(branchBase + displacement)); + _instructionFrequency.RecordTakenBranch(State.LastInstructionProgramCounter, opcode, target, 2); + State.ProgramCounter = target; CompleteTiming(M68kInstructionTimingKey.BranchByteTaken); return; } @@ -3995,7 +3997,9 @@ private void ExecuteWordBranch(ushort opcode) if (CheckCondition(condition)) { - State.ProgramCounter = unchecked((uint)(branchBase + displacement)); + var target = unchecked((uint)(branchBase + displacement)); + _instructionFrequency.RecordTakenBranch(State.LastInstructionProgramCounter, opcode, target, 4); + State.ProgramCounter = target; CompleteTiming(M68kInstructionTimingKey.BranchWordTaken); return; } @@ -4022,7 +4026,9 @@ private void ExecuteLongBranch(ushort opcode) if (CheckCondition(condition)) { - State.ProgramCounter = unchecked((uint)(branchBase + displacement)); + var target = unchecked((uint)(branchBase + displacement)); + _instructionFrequency.RecordTakenBranch(State.LastInstructionProgramCounter, opcode, target, 6); + State.ProgramCounter = target; CompleteTiming(M68kInstructionTimingKey.BranchLongTaken); return; } @@ -4058,7 +4064,9 @@ private void ExecuteDbcc(ushort opcode) State.D[register] = (State.D[register] & 0xFFFF_0000u) | counter; if (counter != 0xFFFF) { - State.ProgramCounter = unchecked((uint)(branchBase + displacement)); + var target = unchecked((uint)(branchBase + displacement)); + _instructionFrequency.RecordTakenBranch(State.LastInstructionProgramCounter, opcode, target, 4); + State.ProgramCounter = target; CompleteTiming(M68kInstructionTimingKey.DbccBranchTaken); return; } @@ -4070,7 +4078,7 @@ protected void BeginInstruction(ushort opcode) { State.LastInstructionProgramCounter = State.ProgramCounter; State.LastOpcode = opcode; - _instructionFrequency.Record(opcode); + _instructionFrequency.Record(State.LastInstructionProgramCounter, opcode); } internal virtual void RaiseFormat0Exception(int vector, uint stackedProgramCounter, M68kInstructionTimingKey timingKey) diff --git a/Copper68k/M68kCore.cs b/Copper68k/M68kCore.cs index ebbc9fa..fef07d1 100644 --- a/Copper68k/M68kCore.cs +++ b/Copper68k/M68kCore.cs @@ -1082,10 +1082,7 @@ public int ExecuteInstruction() var opcode = FetchWord(); State.LastOpcode = opcode; State.LastInstructionProgramCounter = instructionPc; - if (_instructionFrequency.Enabled) - { - _instructionFrequency.Record(opcode); - } + _instructionFrequency.Record(instructionPc, opcode); var decoded = DecodeByOpcodeLine(opcode, instructionPc); if (decoded) @@ -1283,7 +1280,9 @@ private bool DecodeBranch(ushort opcode, uint instructionPc) var carryOrZero = (State.StatusRegister & (M68kCpuState.Carry | M68kCpuState.Zero)) != 0; if (condition == 2 ? !carryOrZero : carryOrZero) { - State.ProgramCounter = (uint)(branchBase + offset); + var target = (uint)(branchBase + offset); + _instructionFrequency.RecordTakenBranch(instructionPc, opcode, target, displacement == 0 ? 4 : 2); + State.ProgramCounter = target; AddCycles(displacement == 0 ? 10 : 10); } else @@ -1299,7 +1298,9 @@ private bool DecodeBranch(ushort opcode, uint instructionPc) { if ((State.StatusRegister & M68kCpuState.Zero) == 0) { - State.ProgramCounter = (uint)(branchBase + offset); + var target = (uint)(branchBase + offset); + _instructionFrequency.RecordTakenBranch(instructionPc, opcode, target, displacement == 0 ? 4 : 2); + State.ProgramCounter = target; AddCycles(displacement == 0 ? 10 : 10); } else @@ -1315,7 +1316,9 @@ private bool DecodeBranch(ushort opcode, uint instructionPc) { if ((State.StatusRegister & M68kCpuState.Zero) != 0) { - State.ProgramCounter = (uint)(branchBase + offset); + var target = (uint)(branchBase + offset); + _instructionFrequency.RecordTakenBranch(instructionPc, opcode, target, displacement == 0 ? 4 : 2); + State.ProgramCounter = target; AddCycles(displacement == 0 ? 10 : 10); } else @@ -1329,7 +1332,9 @@ private bool DecodeBranch(ushort opcode, uint instructionPc) if (CheckCondition(condition)) { - State.ProgramCounter = (uint)(branchBase + offset); + var target = (uint)(branchBase + offset); + _instructionFrequency.RecordTakenBranch(instructionPc, opcode, target, displacement == 0 ? 4 : 2); + State.ProgramCounter = target; AddCycles(displacement == 0 ? 10 : 10); } else @@ -1847,7 +1852,9 @@ private bool DecodeLine5(ushort opcode) State.D[reg] = (State.D[reg] & 0xFFFF_0000) | counter; if (counter != 0xFFFF) { - State.ProgramCounter = (uint)(branchBase + displacement); + var target = (uint)(branchBase + displacement); + _instructionFrequency.RecordTakenBranch(State.LastInstructionProgramCounter, opcode, target, 4); + State.ProgramCounter = target; AddCycles(10); } else diff --git a/Copper68k/M68kInstructionFrequency.cs b/Copper68k/M68kInstructionFrequency.cs index 51c929f..972031a 100644 --- a/Copper68k/M68kInstructionFrequency.cs +++ b/Copper68k/M68kInstructionFrequency.cs @@ -57,17 +57,41 @@ internal readonly record struct M68kOpcodeFrequency( string JitTargetName, long Count); + internal readonly record struct M68kPcFrequency( + uint ProgramCounter, + ushort Opcode, + string Mnemonic, + M68kInstructionFamily Family, + string FamilyName, + M68kJitTarget JitTarget, + string JitTargetName, + long Count); + + internal readonly record struct M68kHotLoopFrequency( + uint StartProgramCounter, + uint EndProgramCounter, + uint BranchProgramCounter, + uint TargetProgramCounter, + ushort BranchOpcode, + string BranchMnemonic, + int ByteLength, + long Count); + internal readonly record struct M68kInstructionFrequencySnapshot( long TotalInstructions, IReadOnlyList Families, IReadOnlyList JitTargets, - IReadOnlyList Opcodes) + IReadOnlyList Opcodes, + IReadOnlyList HotPcs, + IReadOnlyList HotLoops) { public static M68kInstructionFrequencySnapshot Empty { get; } = new( 0, Array.Empty(), Array.Empty(), - Array.Empty()); + Array.Empty(), + Array.Empty(), + Array.Empty()); } internal interface IM68kInstructionFrequencyProvider @@ -81,14 +105,20 @@ internal interface IM68kInstructionFrequencyProvider internal sealed class M68kInstructionFrequencyMatrix { + private const int MaxHotLoopByteLength = 4096; private readonly long[] _familyCounts = new long[Enum.GetValues().Length]; private readonly long[] _jitTargetCounts = new long[Enum.GetValues().Length]; private readonly Dictionary _opcodeCounts = new Dictionary(); + private readonly Dictionary _pcCounts = new Dictionary(); + private readonly Dictionary _hotLoopCounts = new Dictionary(); private long _totalInstructions; public bool Enabled { get; set; } public void Record(ushort opcode) + => Record(0, opcode); + + public void Record(uint programCounter, ushort opcode) { if (!Enabled) { @@ -101,9 +131,50 @@ public void Record(ushort opcode) _jitTargetCounts[(int)jitTarget]++; _opcodeCounts.TryGetValue(opcode, out var count); _opcodeCounts[opcode] = count + 1; + var pcKey = CreatePcKey(programCounter, opcode); + _pcCounts.TryGetValue(pcKey, out var pcCount); + _pcCounts[pcKey] = pcCount + 1; _totalInstructions++; } + public void RecordTakenBranch( + uint branchProgramCounter, + ushort branchOpcode, + uint targetProgramCounter, + int instructionByteLength) + { + if (!Enabled || + instructionByteLength <= 0 || + targetProgramCounter > branchProgramCounter) + { + return; + } + + var endProgramCounter = unchecked(branchProgramCounter + (uint)instructionByteLength); + if (endProgramCounter < branchProgramCounter || + targetProgramCounter >= endProgramCounter) + { + return; + } + + var byteLength = endProgramCounter - targetProgramCounter; + if (byteLength == 0 || + byteLength > MaxHotLoopByteLength) + { + return; + } + + var key = new HotLoopKey( + targetProgramCounter, + endProgramCounter, + branchProgramCounter, + targetProgramCounter, + branchOpcode, + (int)byteLength); + _hotLoopCounts.TryGetValue(key, out var count); + _hotLoopCounts[key] = count + 1; + } + public M68kInstructionFrequencySnapshot CaptureSnapshot() { if (_totalInstructions == 0) @@ -150,7 +221,45 @@ public M68kInstructionFrequencySnapshot CaptureSnapshot() .ThenBy(entry => entry.Opcode) .ToArray(); - return new M68kInstructionFrequencySnapshot(_totalInstructions, families, jitTargets, opcodes); + var hotPcs = _pcCounts + .Select(entry => + { + var programCounter = GetPcFromKey(entry.Key); + var opcode = GetOpcodeFromKey(entry.Key); + var family = M68kInstructionClassifier.GetFamily(opcode); + var jitTarget = M68kInstructionClassifier.GetJitTarget(opcode); + return new M68kPcFrequency( + programCounter, + opcode, + M68kInstructionClassifier.GetMnemonic(opcode), + family, + M68kInstructionClassifier.GetFamilyName(family), + jitTarget, + M68kInstructionClassifier.GetJitTargetName(jitTarget), + entry.Value); + }) + .OrderByDescending(entry => entry.Count) + .ThenBy(entry => entry.ProgramCounter) + .ThenBy(entry => entry.Opcode) + .ToArray(); + + var hotLoops = _hotLoopCounts + .Select(entry => new M68kHotLoopFrequency( + entry.Key.StartProgramCounter, + entry.Key.EndProgramCounter, + entry.Key.BranchProgramCounter, + entry.Key.TargetProgramCounter, + entry.Key.BranchOpcode, + M68kInstructionClassifier.GetMnemonic(entry.Key.BranchOpcode), + entry.Key.ByteLength, + entry.Value)) + .OrderByDescending(entry => entry.Count) + .ThenBy(entry => entry.StartProgramCounter) + .ThenBy(entry => entry.BranchProgramCounter) + .ThenBy(entry => entry.BranchOpcode) + .ToArray(); + + return new M68kInstructionFrequencySnapshot(_totalInstructions, families, jitTargets, opcodes, hotPcs, hotLoops); } public void Reset() @@ -158,8 +267,27 @@ public void Reset() Array.Clear(_familyCounts); Array.Clear(_jitTargetCounts); _opcodeCounts.Clear(); + _pcCounts.Clear(); + _hotLoopCounts.Clear(); _totalInstructions = 0; } + + private static ulong CreatePcKey(uint programCounter, ushort opcode) + => ((ulong)programCounter << 16) | opcode; + + private static uint GetPcFromKey(ulong key) + => (uint)(key >> 16); + + private static ushort GetOpcodeFromKey(ulong key) + => (ushort)key; + + private readonly record struct HotLoopKey( + uint StartProgramCounter, + uint EndProgramCounter, + uint BranchProgramCounter, + uint TargetProgramCounter, + ushort BranchOpcode, + int ByteLength); } internal static class M68kInstructionClassifier diff --git a/Copper68k/M68kJitCore.cs b/Copper68k/M68kJitCore.cs index a572d94..6af6e95 100644 --- a/Copper68k/M68kJitCore.cs +++ b/Copper68k/M68kJitCore.cs @@ -7788,7 +7788,7 @@ private bool BeginCompiledInstruction( State.LastOpcode = expectedOpcode; State.LastInstructionProgramCounter = programCounter; State.ProgramCounter = Normalize(nextProgramCounter); - _instructionFrequency.Record(expectedOpcode); + _instructionFrequency.Record(programCounter, expectedOpcode); _compiledInstructionCycleFloorActive = true; return true; } diff --git a/CopperMod.Amiga.Tests/M68kInstructionFrequencyTests.cs b/CopperMod.Amiga.Tests/M68kInstructionFrequencyTests.cs index 7650283..bf4fe44 100644 --- a/CopperMod.Amiga.Tests/M68kInstructionFrequencyTests.cs +++ b/CopperMod.Amiga.Tests/M68kInstructionFrequencyTests.cs @@ -32,6 +32,8 @@ public void InterpreterFrequencyCapturesFamilyAndOpcodeCounts() Assert.Contains(snapshot.Families, family => family.Family == M68kInstructionFamily.Move && family.Count == 1); Assert.Contains(snapshot.Families, family => family.Family == M68kInstructionFamily.CompareMemory && family.Count == 1); Assert.Contains(snapshot.Opcodes, opcode => opcode.Opcode == 0xB308 && opcode.Mnemonic == "CMPM" && opcode.Count == 1); + Assert.Contains(snapshot.HotPcs, pc => pc.ProgramCounter == 0x1000 && pc.Opcode == 0x7001 && pc.Count == 1); + Assert.Contains(snapshot.HotPcs, pc => pc.ProgramCounter == 0x1002 && pc.Opcode == 0xB308 && pc.Count == 1); } [Fact] @@ -89,6 +91,76 @@ public void InstructionFrequencyCanBeResetBetweenBenchmarkPhases() Assert.Equal(1, snapshot.TotalInstructions); Assert.Contains(snapshot.Opcodes, opcode => opcode.Opcode == 0x7202 && opcode.Count == 1); Assert.DoesNotContain(snapshot.Opcodes, opcode => opcode.Opcode == 0x7001); + Assert.Contains(snapshot.HotPcs, pc => pc.ProgramCounter == 0x1002 && pc.Opcode == 0x7202 && pc.Count == 1); + Assert.DoesNotContain(snapshot.HotPcs, pc => pc.ProgramCounter == 0x1000 && pc.Opcode == 0x7001); + } + + [Fact] + public void InterpreterFrequencyCapturesBackwardBranchHotLoopBlocks() + { + var bus = new AmigaBus(); + WriteWords( + bus, + 0x1000, + 0x7002, // MOVEQ #2,D0 + 0x51C8, // DBRA D0,loop + 0xFFFE, // loop target is the DBRA opcode + 0x4E71); // NOP after expiry + var cpu = new M68kInterpreter(bus); + cpu.Reset(0x1000, 0x4000); + cpu.InstructionFrequencyEnabled = true; + + cpu.ExecuteInstruction(); + cpu.ExecuteInstruction(); + cpu.ExecuteInstruction(); + cpu.ExecuteInstruction(); + + var snapshot = cpu.CaptureInstructionFrequency(); + Assert.Equal(4, snapshot.TotalInstructions); + Assert.Contains(snapshot.HotPcs, pc => pc.ProgramCounter == 0x1002 && pc.Opcode == 0x51C8 && pc.Count == 3); + var loop = Assert.Single(snapshot.HotLoops); + Assert.Equal(0x1002u, loop.StartProgramCounter); + Assert.Equal(0x1006u, loop.EndProgramCounter); + Assert.Equal(0x1002u, loop.BranchProgramCounter); + Assert.Equal(0x1002u, loop.TargetProgramCounter); + Assert.Equal(0x51C8, loop.BranchOpcode); + Assert.Equal("DBcc", loop.BranchMnemonic); + Assert.Equal(4, loop.ByteLength); + Assert.Equal(2, loop.Count); + } + + [Fact] + public void TimedInterpreterFrequencyCapturesBackwardBranchHotLoopBlocks() + { + var bus = new AmigaBus(); + WriteWords( + bus, + 0x1000, + 0x7002, // MOVEQ #2,D0 + 0x51C8, // DBRA D0,loop + 0xFFFE, // loop target is the DBRA opcode + 0x4E71); // NOP after expiry + var cpu = new M68020Interpreter(bus); + cpu.Reset(0x1000, 0x4000); + cpu.InstructionFrequencyEnabled = true; + + cpu.ExecuteInstruction(); + cpu.ExecuteInstruction(); + cpu.ExecuteInstruction(); + cpu.ExecuteInstruction(); + + var snapshot = cpu.CaptureInstructionFrequency(); + Assert.Equal(4, snapshot.TotalInstructions); + Assert.Contains(snapshot.HotPcs, pc => pc.ProgramCounter == 0x1002 && pc.Opcode == 0x51C8 && pc.Count == 3); + var loop = Assert.Single(snapshot.HotLoops); + Assert.Equal(0x1002u, loop.StartProgramCounter); + Assert.Equal(0x1006u, loop.EndProgramCounter); + Assert.Equal(0x1002u, loop.BranchProgramCounter); + Assert.Equal(0x1002u, loop.TargetProgramCounter); + Assert.Equal(0x51C8, loop.BranchOpcode); + Assert.Equal("DBcc", loop.BranchMnemonic); + Assert.Equal(4, loop.ByteLength); + Assert.Equal(2, loop.Count); } [Fact] diff --git a/CopperMod.Amiga/AmigaDisk.cs b/CopperMod.Amiga/AmigaDisk.cs index 63daa56..2fddba5 100644 --- a/CopperMod.Amiga/AmigaDisk.cs +++ b/CopperMod.Amiga/AmigaDisk.cs @@ -933,10 +933,8 @@ public AmigaDiskController(AmigaBus bus, int connectedDriveCount = 1, bool enabl Drive1 = new AmigaFloppyDrive(); Drive2 = new AmigaFloppyDrive(); Drive3 = new AmigaFloppyDrive(); - _drives = new[] { Drive0, Drive1, Drive2, Drive3 }; - _streams = Enumerable.Range(0, MaxFloppyDriveCount) - .Select(_ => new DiskStreamState()) - .ToArray(); + _drives = [Drive0, Drive1, Drive2, Drive3]; + _streams = [.. Enumerable.Range(0, MaxFloppyDriveCount).Select(_ => new DiskStreamState())]; _traceRecorder = AmigaDiskTraceRecorder.IsEnvironmentEnabled() ? new AmigaDiskTraceRecorder() : null; diff --git a/CopperMod.Amiga/AmigaRasterlineScheduleCache.cs b/CopperMod.Amiga/AmigaRasterlineScheduleCache.cs index c96f0b6..40bb67f 100644 --- a/CopperMod.Amiga/AmigaRasterlineScheduleCache.cs +++ b/CopperMod.Amiga/AmigaRasterlineScheduleCache.cs @@ -61,8 +61,12 @@ public void InvalidateFrom(long cycle, AmigaHardwareEventMask mask) if (cycle <= _lineEndCycle) { - _valid = false; - _computedMask = AmigaHardwareEventMask.None; + var affectedMask = mask & InterruptPollReadMask; + if (affectedMask == AmigaHardwareEventMask.None) + return; + + _computedMask &= ~affectedMask; + _interruptPollCycleComputed = false; _invalidationCount++; } } diff --git a/CopperMod.Amiga/OcsDisplay.cs b/CopperMod.Amiga/OcsDisplay.cs index 94f246c..6ca22a0 100644 --- a/CopperMod.Amiga/OcsDisplay.cs +++ b/CopperMod.Amiga/OcsDisplay.cs @@ -56,6 +56,8 @@ internal sealed class OcsDisplay private const int MaxLiveRasterlinePlanEvents = 64; private static readonly int[] LowResBitplaneFetchSlotsByPlane = [7, 3, 5, 1, 6, 2]; private static readonly int[] HighResBitplaneFetchSlotsByPlane = [3, 1, 2, 0]; + private static readonly sbyte[] LowResBitplanePlanesByFetchSlot = [-1, 3, 5, 1, -1, 2, 4, 0]; + private static readonly sbyte[] HighResBitplanePlanesByFetchSlot = [3, 1, 2, 0]; private readonly AmigaBus _bus; private readonly bool _liveDmaEnabled; private readonly List _pendingWrites = new List(MaxPendingWrites); @@ -165,6 +167,9 @@ internal sealed class OcsDisplay private readonly bool[] _liveRasterlinePlanRowsTouched = new bool[LowResOutputHeight]; private readonly bool[] _liveRasterlinePlanRowsValid = new bool[LowResOutputHeight]; private readonly bool[] _liveRasterlinePlanRowsOverflowed = new bool[LowResOutputHeight]; + private readonly int[] _liveRasterlinePlanWakeSearchIndices = new int[LowResOutputHeight]; + private readonly bool[] _liveRasterlinePlanWakeSearchLineStateVisibility = new bool[LowResOutputHeight]; + private readonly long[] _liveRasterlinePlanWakeSearchCycles = new long[LowResOutputHeight]; private readonly LiveRasterlinePlanEvent[] _predictedRasterlinePlanEvents = new LiveRasterlinePlanEvent[LowResOutputHeight * MaxLiveRasterlinePlanEvents]; private readonly int[] _predictedRasterlinePlanEventCounts = new int[LowResOutputHeight]; private readonly LiveRasterlinePredictionStatus[] _predictedRasterlinePlanStatuses = new LiveRasterlinePredictionStatus[LowResOutputHeight]; @@ -186,6 +191,17 @@ internal sealed class OcsDisplay private long _liveNextDisplayEventCycle; private bool _liveNextWorkCycleValid; private long _liveNextWorkCycle; + private bool _liveDisplayWakeCandidateCacheValid; + private long _liveDisplayWakeCandidateCacheCurrentCycle; + private long _liveDisplayWakeCandidateCacheTargetCycle; + private long _liveDisplayWakeCandidateCacheCapturedThroughCycle; + private bool _liveDisplayWakeCandidateCacheHasValue; + private long _liveDisplayWakeCandidateCacheValue; + private bool _liveCopperWaitCycleValid; + private ushort _liveCopperWaitFirst; + private ushort _liveCopperWaitSecond; + private long _liveCopperWaitStartCycle; + private long _liveCopperWaitCycle; private long _liveCycle; private long _liveFrameStartCycle; private long _liveCapturedThroughCycle; @@ -496,6 +512,7 @@ private void InvalidateLiveWorkCycle() { _liveNextWorkCycleValid = false; _liveNextWorkCycle = long.MaxValue; + _liveDisplayWakeCandidateCacheValid = false; } private void CaptureLiveBitplaneDmaBeforeHrmGrant(long requestedCycle) @@ -1671,9 +1688,16 @@ private void InvalidateLiveDisplayEventCycle() { _liveNextDisplayEventValid = false; _liveNextDisplayEventCycle = long.MaxValue; + InvalidateLiveCopperWaitCycle(); InvalidateLiveWorkCycle(); } + private void InvalidateLiveCopperWaitCycle() + { + _liveCopperWaitCycleValid = false; + _liveCopperWaitCycle = long.MaxValue; + } + private long GetNextLiveDisplayEventCycle() { if (_liveNextDisplayEventValid) @@ -1701,6 +1725,9 @@ private void ResetLiveRasterlinePlan(bool resetDescriptorCounters = false) Array.Clear(_liveRasterlinePlanRowsTouched); Array.Clear(_liveRasterlinePlanRowsValid); Array.Clear(_liveRasterlinePlanRowsOverflowed); + Array.Clear(_liveRasterlinePlanWakeSearchIndices); + Array.Clear(_liveRasterlinePlanWakeSearchLineStateVisibility); + Array.Clear(_liveRasterlinePlanWakeSearchCycles); Array.Clear(_predictedRasterlinePlanEventCounts); Array.Clear(_predictedRasterlinePlanStatuses); Array.Clear(_liveRasterlineDmaDescriptors); @@ -1765,6 +1792,9 @@ private bool TryBeginLiveRasterlinePlanEvent(long cycle, int expectedRow) _liveRasterlinePlanRowsValid[row] = true; _liveRasterlinePlanRowsOverflowed[row] = false; _liveRasterlinePlanEventCounts[row] = 0; + _liveRasterlinePlanWakeSearchIndices[row] = 0; + _liveRasterlinePlanWakeSearchLineStateVisibility[row] = false; + _liveRasterlinePlanWakeSearchCycles[row] = 0; _predictedRasterlinePlanEventCounts[row] = 0; _predictedRasterlinePlanStatuses[row] = LiveRasterlinePredictionStatus.None; } @@ -1801,7 +1831,7 @@ private bool TryGetLiveRasterlinePlanRow(long cycle, out int row) return false; } - row = GetOutputRowForCycle(_liveFrameStartCycle, cycle); + row = (int)((cycle - _liveFrameStartCycle) / PalLineCycles) - StandardVStart; return (uint)row < (uint)LowResOutputHeight; } @@ -2283,21 +2313,50 @@ private bool TryGetRecordedLiveRasterlinePlanWakeCandidate( var count = Math.Min(_liveRasterlinePlanEventCounts[currentRow], MaxLiveRasterlinePlanEvents); var baseIndex = currentRow * MaxLiveRasterlinePlanEvents; var lineStateEventsAreWakeVisible = HasLiveLineStateWakeWork(); - for (var i = 0; i < count; i++) + var searchIndex = _liveRasterlinePlanWakeSearchIndices[currentRow]; + if (searchIndex > count || + currentCycle < _liveRasterlinePlanWakeSearchCycles[currentRow] || + lineStateEventsAreWakeVisible != _liveRasterlinePlanWakeSearchLineStateVisibility[currentRow]) + { + searchIndex = 0; + } + + while (searchIndex < count) { - var planEvent = _liveRasterlinePlanEvents[baseIndex + i]; + var planEvent = _liveRasterlinePlanEvents[baseIndex + searchIndex]; if (planEvent.Kind == LiveRasterlinePlanEventKind.LineStateCapture && !lineStateEventsAreWakeVisible) { + searchIndex++; continue; } var cycle = planEvent.Cycle; - if (cycle > currentCycle && cycle <= targetCycle) + if (cycle <= currentCycle) + { + searchIndex++; + continue; + } + + _liveRasterlinePlanWakeSearchIndices[currentRow] = searchIndex; + _liveRasterlinePlanWakeSearchLineStateVisibility[currentRow] = lineStateEventsAreWakeVisible; + _liveRasterlinePlanWakeSearchCycles[currentRow] = currentCycle; + if (cycle <= targetCycle) { candidate = cycle; return true; } + + return true; + } + + if (_liveRasterlinePlanWakeSearchIndices[currentRow] != count || + _liveRasterlinePlanWakeSearchLineStateVisibility[currentRow] != lineStateEventsAreWakeVisible || + _liveRasterlinePlanWakeSearchCycles[currentRow] != currentCycle) + { + _liveRasterlinePlanWakeSearchIndices[currentRow] = count; + _liveRasterlinePlanWakeSearchLineStateVisibility[currentRow] = lineStateEventsAreWakeVisible; + _liveRasterlinePlanWakeSearchCycles[currentRow] = currentCycle; } return true; @@ -2795,28 +2854,53 @@ private long GetNextLiveCopperBarrierCycle() { currentCycle = Math.Max(0, currentCycle); targetCycle = Math.Max(currentCycle, targetCycle); + if (_liveDisplayWakeCandidateCacheValid && + _liveDisplayWakeCandidateCacheCurrentCycle == currentCycle && + _liveDisplayWakeCandidateCacheTargetCycle == targetCycle && + _liveDisplayWakeCandidateCacheCapturedThroughCycle == _liveCapturedThroughCycle) + { + return _liveDisplayWakeCandidateCacheHasValue + ? _liveDisplayWakeCandidateCacheValue + : null; + } + if (!_liveDmaEnabled || !_liveFrameValid || !HasLiveDisplayWork() || targetCycle < currentCycle) { - return null; + return CacheLiveDisplayWakeCandidate(currentCycle, targetCycle, null); } if (TryGetRecordedLiveRasterlinePlanWakeCandidate(currentCycle, targetCycle, out var recordedCycle)) { - return recordedCycle == long.MaxValue - ? null + var candidate = recordedCycle == long.MaxValue + ? (long?)null : recordedCycle; + return CacheLiveDisplayWakeCandidate(currentCycle, targetCycle, candidate); } var nextCycle = GetNextLiveCpuVisibleWorkCycle(); if (nextCycle == long.MaxValue || nextCycle > targetCycle) { - return null; + return CacheLiveDisplayWakeCandidate(currentCycle, targetCycle, null); } - return nextCycle <= currentCycle ? currentCycle : nextCycle; + return CacheLiveDisplayWakeCandidate( + currentCycle, + targetCycle, + nextCycle <= currentCycle ? currentCycle : nextCycle); + } + + private long? CacheLiveDisplayWakeCandidate(long currentCycle, long targetCycle, long? candidate) + { + _liveDisplayWakeCandidateCacheCurrentCycle = currentCycle; + _liveDisplayWakeCandidateCacheTargetCycle = targetCycle; + _liveDisplayWakeCandidateCacheCapturedThroughCycle = _liveCapturedThroughCycle; + _liveDisplayWakeCandidateCacheHasValue = candidate.HasValue; + _liveDisplayWakeCandidateCacheValue = candidate.GetValueOrDefault(); + _liveDisplayWakeCandidateCacheValid = true; + return candidate; } private long GetNextLiveDisplayEventCycle(bool includeCopper) @@ -2938,6 +3022,12 @@ private long GetNextLiveCopperCycle(long targetCycle) if (_liveCopper.Waiting) { var blitterReadyCycle = GetCopperBlitterReadyCycle(_liveCopper.WaitSecond, _liveCopper.Cycle); + if (blitterReadyCycle <= _liveCopper.Cycle) + { + var cachedWaitCycle = GetCachedLiveCopperWaitCycle(); + return cachedWaitCycle <= targetCycle ? cachedWaitCycle : long.MaxValue; + } + if (!TryGetCopperWaitCycle( _liveCopper.WaitFirst, _liveCopper.WaitSecond, @@ -2956,6 +3046,34 @@ private long GetNextLiveCopperCycle(long targetCycle) return Math.Max(_liveCopper.Cycle, _liveFrameStartCycle); } + private long GetCachedLiveCopperWaitCycle() + { + if (_liveCopperWaitCycleValid && + _liveCopperWaitFirst == _liveCopper.WaitFirst && + _liveCopperWaitSecond == _liveCopper.WaitSecond && + _liveCopperWaitStartCycle == _liveCopper.Cycle) + { + return _liveCopperWaitCycle; + } + + var frameStopCycle = _liveFrameStartCycle + PalFrameCycles; + _liveCopperWaitFirst = _liveCopper.WaitFirst; + _liveCopperWaitSecond = _liveCopper.WaitSecond; + _liveCopperWaitStartCycle = _liveCopper.Cycle; + _liveCopperWaitCycle = TryGetCopperWaitCycle( + _liveCopper.WaitFirst, + _liveCopper.WaitSecond, + _liveFrameStartCycle, + _liveCopper.Cycle, + frameStopCycle, + blitterFinished: true, + out var waitCycle) + ? waitCycle + : long.MaxValue; + _liveCopperWaitCycleValid = true; + return _liveCopperWaitCycle; + } + private void StepLiveCopper(long targetCycle) { if (_liveCopper.PendingMove) @@ -2998,14 +3116,24 @@ private void StepLiveCopper(long targetCycle) } } - if (!TryGetCopperWaitCycle( - _liveCopper.WaitFirst, - _liveCopper.WaitSecond, - _liveFrameStartCycle, - _liveCopper.Cycle, - targetCycle + 1, - blitterFinished: true, - out var waitCycle)) + long waitCycle; + if (blitterReadyCycle <= _liveCopper.Cycle) + { + waitCycle = GetCachedLiveCopperWaitCycle(); + if (waitCycle > targetCycle) + { + _liveCopper.Cycle = targetCycle + 1; + return; + } + } + else if (!TryGetCopperWaitCycle( + _liveCopper.WaitFirst, + _liveCopper.WaitSecond, + _liveFrameStartCycle, + _liveCopper.Cycle, + targetCycle + 1, + blitterFinished: true, + out waitCycle)) { _liveCopper.Cycle = targetCycle + 1; return; @@ -3016,11 +3144,13 @@ private void StepLiveCopper(long targetCycle) { _liveCopper.Cycle = resumeCycle; _liveCopper.Waiting = false; + InvalidateLiveCopperWaitCycle(); return; } _liveCopper.Cycle = resumeCycle; _liveCopper.Waiting = false; + InvalidateLiveCopperWaitCycle(); return; } @@ -3562,16 +3692,57 @@ private void CaptureLiveBitplaneFetchBatch(long stopCycle) return; } - var state = _liveLineStates[_liveNextFetchRow]; - var fetchHorizontal = state.DataFetchStart + (_liveNextFetchWord * state.FetchSlotStride) + _liveNextFetchSlot; - var fetchCycle = state.LineStartCycle + ((long)fetchHorizontal * CopperHpCycles); - if (fetchCycle > stopCycle) + var row = _liveNextFetchRow; + var state = _liveLineStates[row]; + var planeCount = Math.Max(0, state.PlaneCount); + var fetchWords = state.FetchWords; + var fetchSlotStride = state.FetchSlotStride; + var dataFetchStart = state.DataFetchStart; + var lineStartCycle = state.LineStartCycle; + var word = _liveNextFetchWord; + var slot = _liveNextFetchSlot; + var advanced = false; + + while (word < fetchWords) { - return; + while (slot < fetchSlotStride) + { + if (!TryGetBitplanePlaneForFetchSlot(slot, planeCount, fetchSlotStride, out var plane)) + { + slot++; + continue; + } + + var fetchHorizontal = dataFetchStart + (word * fetchSlotStride) + slot; + var fetchCycle = lineStartCycle + ((long)fetchHorizontal * CopperHpCycles); + if (fetchCycle > stopCycle) + { + _liveNextFetchRow = row; + _liveNextFetchWord = word; + _liveNextFetchPlane = plane; + _liveNextFetchSlot = slot; + if (advanced) + { + InvalidateLiveWorkCycle(); + } + + return; + } + + CaptureLiveBitplaneFetch(row, plane, word, fetchCycle, state); + slot++; + advanced = true; + } + + slot = 0; + word++; } - CaptureLiveBitplaneFetch(_liveNextFetchRow, _liveNextFetchPlane, _liveNextFetchWord, fetchCycle, state); - AdvanceLiveFetchCursor(); + _liveNextFetchRow = row; + _liveNextFetchWord = word; + _liveNextFetchPlane = 0; + _liveNextFetchSlot = slot; + AdvanceLiveFetchToNextRow(advanceBitplanePointers: true); } } @@ -4199,10 +4370,11 @@ private TimelineRejectReason GetTimelineRejectReason( } var rowStop = GetTimelineRowStop(frameStartCycle, frameStopCycle); + var checkFrameStop = frameStopCycle < frameStartCycle + PalFrameCycles; for (var row = 0; row < rowStop; row++) { - var lineStart = GetOutputRowStartCycle(frameStartCycle, row); - if (lineStart >= frameStopCycle) + if (checkFrameStop && + GetOutputRowStartCycle(frameStartCycle, row) >= frameStopCycle) { break; } @@ -4239,6 +4411,7 @@ private TimelineRejectReason GetTimelineRejectReason( private bool IsTimelineSpriteCompleteForRendering(DisplayFrameTimeline timeline, long frameStartCycle, long frameStopCycle) { var rowStop = GetTimelineRowStop(frameStartCycle, frameStopCycle); + var checkFrameStop = frameStopCycle < frameStartCycle + PalFrameCycles; for (var spriteIndex = 0; spriteIndex < _sprites.Length; spriteIndex++) { var commands = GetTimelineSpriteFrameCommands(spriteIndex, timeline); @@ -4255,8 +4428,8 @@ private bool IsTimelineSpriteCompleteForRendering(DisplayFrameTimeline timeline, var yStop = Math.Min(Math.Min(sprite.YStop, rowStop), LowResOutputHeight); for (var y = yStart; y < yStop; y++) { - var lineStart = GetOutputRowStartCycle(frameStartCycle, y); - if (lineStart >= frameStopCycle) + if (checkFrameStop && + GetOutputRowStartCycle(frameStartCycle, y) >= frameStopCycle) { break; } @@ -4321,6 +4494,7 @@ private void CompleteTimelineSpriteFetchOutcomes( bool allowExactCompletionReads) { var rowStop = GetTimelineRowStop(frameStartCycle, frameStopCycle); + var checkFrameStop = frameStopCycle < frameStartCycle + PalFrameCycles; for (var spriteIndex = 0; spriteIndex < _sprites.Length; spriteIndex++) { var commands = GetTimelineSpriteFrameCommands(spriteIndex, timeline); @@ -4337,8 +4511,8 @@ private void CompleteTimelineSpriteFetchOutcomes( var yStop = Math.Min(Math.Min(sprite.YStop, rowStop), LowResOutputHeight); for (var y = yStart; y < yStop; y++) { - var lineStart = GetOutputRowStartCycle(frameStartCycle, y); - if (lineStart >= frameStopCycle) + if (checkFrameStop && + GetOutputRowStartCycle(frameStartCycle, y) >= frameStopCycle) { break; } @@ -6660,26 +6834,14 @@ private static int GetBitplaneFetchSlot(int plane, int fetchSlotStride) private static bool TryGetBitplanePlaneForFetchSlot(int slot, int planeCount, int fetchSlotStride, out int plane) { - if (fetchSlotStride <= 4) - { - for (var candidate = 0; candidate < planeCount && candidate < HighResBitplaneFetchSlotsByPlane.Length; candidate++) - { - if (HighResBitplaneFetchSlotsByPlane[candidate] == slot) - { - plane = candidate; - return true; - } - } - - plane = -1; - return false; - } - - for (var candidate = 0; candidate < planeCount && candidate < LowResBitplaneFetchSlotsByPlane.Length; candidate++) + var planesByFetchSlot = fetchSlotStride <= 4 + ? HighResBitplanePlanesByFetchSlot + : LowResBitplanePlanesByFetchSlot; + if ((uint)slot < (uint)planesByFetchSlot.Length) { - if (LowResBitplaneFetchSlotsByPlane[candidate] == slot) + plane = planesByFetchSlot[slot]; + if ((uint)plane < (uint)planeCount) { - plane = candidate; return true; } } diff --git a/CopperScreen.Benchmarks/Program.cs b/CopperScreen.Benchmarks/Program.cs index c1fb082..68b4c32 100644 --- a/CopperScreen.Benchmarks/Program.cs +++ b/CopperScreen.Benchmarks/Program.cs @@ -1,7 +1,6 @@ using System.Diagnostics; using System.Reflection; using System.Runtime.InteropServices; -using System.Buffers.Binary; using CopperMod.Amiga; using CopperScreen; @@ -217,7 +216,6 @@ static BenchmarkRunResult RunBenchmark(BenchmarkWorkload workload, BenchmarkOpti : M68kInstructionFrequencySnapshot.Empty; if (options.InstructionMatrix) { - WriteHotLoopDiagnostics(workload, emulator); emulator.SetInstructionFrequencyEnabled(false); } @@ -483,66 +481,24 @@ static void WriteInstructionMatrix(BenchmarkRunResult result, int topOpcodeCount Console.WriteLine( $"instruction-opcode\t{result.Workload.Name}\t{result.CpuBackend}\t0x{opcode.Opcode:X4}\t{opcode.Mnemonic}\t{opcode.FamilyName}\t{opcode.Count}\t{PercentText(opcode.Count, snapshot.TotalInstructions)}"); } -} - -static void WriteHotLoopDiagnostics(BenchmarkWorkload workload, CopperScreenEmulator emulator) -{ - var machine = GetMachine(emulator); - var bus = machine.Bus; - var a5 = machine.Cpu.State.A[5]; - var emitted = 0; - ScanHotLoopRegion(workload, "chip", bus.ChipRam, 0, bus, a5, ref emitted); - ScanHotLoopRegion(workload, "exp", bus.ExpansionRam, bus.ExpansionRamBase, bus, a5, ref emitted); - ScanHotLoopRegion(workload, "real", bus.RealFastRam, bus.RealFastRamBase, bus, a5, ref emitted); - if (emitted == 0) - { - Console.WriteLine($"hot-loop\t{workload.Name}\tnone"); - } -} -static void ScanHotLoopRegion( - BenchmarkWorkload workload, - string regionName, - byte[] region, - uint baseAddress, - AmigaBus bus, - uint a5, - ref int emitted) -{ - const int MaxHotLoopDiagnostics = 8; - if (region.Length < 18 || emitted >= MaxHotLoopDiagnostics) + foreach (var pc in snapshot.HotPcs.Take(Math.Max(0, topOpcodeCount))) { - return; + Console.WriteLine( + $"instruction-pc\t{result.Workload.Name}\t{result.CpuBackend}\t{FormatProgramCounter(pc.ProgramCounter)}\t0x{pc.Opcode:X4}\t{pc.Mnemonic}\t{pc.FamilyName}\t{pc.Count}\t{PercentText(pc.Count, snapshot.TotalInstructions)}"); } - for (var offset = 0; offset <= region.Length - 18 && emitted < MaxHotLoopDiagnostics; offset += 2) + foreach (var loop in snapshot.HotLoops.Take(Math.Max(0, topOpcodeCount))) { - var span = region.AsSpan(offset); - if (ReadWord(span, 0) != 0x202D || - ReadWord(span, 4) != 0x0280 || - ReadWord(span, 10) != 0xB0BC || - ReadWord(span, 16) != 0x66EE) - { - continue; - } - - var displacement = unchecked((short)ReadWord(span, 2)); - var andMask = ReadLong(span, 6); - var compareValue = ReadLong(span, 12); - var pc = baseAddress + (uint)offset; - var polledAddress = (uint)(a5 + displacement); - var polledValue = bus.ReadHostLong(polledAddress); Console.WriteLine( - $"hot-loop\t{workload.Name}\tpc=0x{pc:X6}\tregion={regionName}\ta5=0x{a5:X6}\td16={displacement}\taddr=0x{polledAddress:X6}\tvalue=0x{polledValue:X8}\tand=0x{andMask:X8}\tcmp=0x{compareValue:X8}"); - emitted++; + $"hot-loop-block\t{result.Workload.Name}\t{result.CpuBackend}\tstart={FormatProgramCounter(loop.StartProgramCounter)}\tend={FormatProgramCounter(loop.EndProgramCounter)}\tbranch={FormatProgramCounter(loop.BranchProgramCounter)}\ttarget={FormatProgramCounter(loop.TargetProgramCounter)}\top=0x{loop.BranchOpcode:X4}\t{loop.BranchMnemonic}\tbytes={loop.ByteLength}\tcount={loop.Count}\t{PercentText(loop.Count, snapshot.TotalInstructions)}"); } } -static ushort ReadWord(ReadOnlySpan span, int offset) - => BinaryPrimitives.ReadUInt16BigEndian(span.Slice(offset, 2)); - -static uint ReadLong(ReadOnlySpan span, int offset) - => BinaryPrimitives.ReadUInt32BigEndian(span.Slice(offset, 4)); +static string FormatProgramCounter(uint programCounter) + => programCounter <= 0x00FF_FFFFu + ? $"0x{programCounter:X6}" + : $"0x{programCounter:X8}"; static string PercentText(long value, long total) => total == 0 ? "0.00" : $"{(value * 100.0) / total:F2}";