From b783ef0f9b31cbd1d937ac04672523c75b246ac5 Mon Sep 17 00:00:00 2001 From: Isaac Marovitz Date: Tue, 20 May 2025 19:34:14 -0400 Subject: [PATCH 01/48] Metal Signed-off-by: Isaac Marovitz --- CMakeLists.txt | 29 +- core/cfg/option.h | 2 + core/deps/imgui/backends/imgui_impl_metal.h | 74 +++ core/deps/imgui/backends/imgui_impl_metal.mm | 592 ++++++++++++++++++ core/hw/pvr/Renderer_if.cpp | 6 + core/rend/metal/metal_buffer.h | 118 ++++ core/rend/metal/metal_buffer.mm | 28 + core/rend/metal/metal_context.h | 64 ++ core/rend/metal/metal_context.mm | 85 +++ core/rend/metal/metal_driver.h | 122 ++++ core/rend/metal/metal_pipeline.h | 274 +++++++++ core/rend/metal/metal_pipeline.mm | 325 ++++++++++ core/rend/metal/metal_renderer.h | 144 +++++ core/rend/metal/metal_renderer.mm | 611 +++++++++++++++++++ core/rend/metal/metal_shaders.h | 153 +++++ core/rend/metal/metal_shaders.mm | 572 +++++++++++++++++ core/rend/metal/metal_texture.h | 114 ++++ core/rend/metal/metal_texture.mm | 90 +++ core/rend/transform_matrix.h | 2 +- core/types.h | 4 + core/ui/gui.cpp | 16 +- core/ui/mainui.cpp | 4 +- core/wsi/switcher.cpp | 16 + 23 files changed, 3440 insertions(+), 5 deletions(-) create mode 100644 core/deps/imgui/backends/imgui_impl_metal.h create mode 100644 core/deps/imgui/backends/imgui_impl_metal.mm create mode 100644 core/rend/metal/metal_buffer.h create mode 100644 core/rend/metal/metal_buffer.mm create mode 100644 core/rend/metal/metal_context.h create mode 100644 core/rend/metal/metal_context.mm create mode 100644 core/rend/metal/metal_driver.h create mode 100644 core/rend/metal/metal_pipeline.h create mode 100644 core/rend/metal/metal_pipeline.mm create mode 100644 core/rend/metal/metal_renderer.h create mode 100644 core/rend/metal/metal_renderer.mm create mode 100644 core/rend/metal/metal_shaders.h create mode 100644 core/rend/metal/metal_shaders.mm create mode 100644 core/rend/metal/metal_texture.h create mode 100644 core/rend/metal/metal_texture.mm diff --git a/CMakeLists.txt b/CMakeLists.txt index bfe4d505af..954126f640 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -30,6 +30,10 @@ else() project(flycast) endif() +if(APPLE) + enable_language(OBJC OBJCXX) +endif() + set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) if(CMAKE_SYSTEM_NAME STREQUAL "NetBSD" OR NINTENDO_SWITCH) @@ -62,6 +66,7 @@ option(USE_HOST_SDL "Use host SDL library" ${USE_HOST_SDL_DEFAULT}) option(USE_HOST_LIBCHDR "Use host libchdr" OFF) option(USE_OPENMP "Use OpenMP if available" ON) option(USE_VULKAN "Build with Vulkan support" ON) +option(USE_METAL "Build with Metal support" ON) option(USE_DX9 "Build with Direct3D 9 support" ON) option(USE_DX11 "Build with Direct3D 11 support" ON) option(LIBRETRO "Build libretro core" OFF) @@ -71,7 +76,7 @@ option(USE_ALSA "Build with ALSA support" ON) option(USE_LIBAO "Build with AO support" ON) option(USE_OSS "Build with OSS support" OFF) option(USE_PULSEAUDIO "Build with PulseAudio support" ON) -option(USE_BREAKPAD "Build and link with breakpad library" ON) +option(USE_BREAKPAD "Build and link with breakpad library" OFF) option(USE_LUA "Build with Lua support" ON) option(ENABLE_GDB_SERVER "Build with GDB debugging support" OFF) option(ENABLE_DC_PROFILER "Build with support for target machine (SH4) profiler" OFF) @@ -1511,6 +1516,28 @@ if(USE_VULKAN) endif() endif() +if(APPLE AND USE_METAL) + target_link_libraries(${PROJECT_NAME} PRIVATE "-framework Metal -framework QuartzCore -framework CoreGraphics") + + target_compile_definitions(${PROJECT_NAME} PRIVATE USE_METAL HAVE_METAL) + target_sources(${PROJECT_NAME} PRIVATE + core/rend/metal/metal_renderer.h + core/rend/metal/metal_renderer.mm + core/rend/metal/metal_context.h + core/rend/metal/metal_context.mm + core/rend/metal/metal_shaders.h + core/rend/metal/metal_shaders.mm + core/rend/metal/metal_texture.h + core/rend/metal/metal_texture.mm + core/rend/metal/metal_pipeline.h + core/rend/metal/metal_pipeline.mm + core/rend/metal/metal_buffer.h + core/rend/metal/metal_buffer.mm + core/rend/metal/metal_driver.h + core/deps/imgui/backends/imgui_impl_metal.h + core/deps/imgui/backends/imgui_impl_metal.mm) +endif() + if(WIN32 AND USE_DX9 AND NOT LIBRETRO AND NOT WINDOWS_STORE AND ("x86" IN_LIST ARCHITECTURE OR "x86_64" IN_LIST ARCHITECTURE)) set(REND_DX9_FILES core/rend/dx9/d3d_overlay.h diff --git a/core/cfg/option.h b/core/cfg/option.h index 9e8d8e6450..260247ba4b 100644 --- a/core/cfg/option.h +++ b/core/cfg/option.h @@ -415,6 +415,8 @@ class RendererOption : public Option { RenderType::DirectX11 #elif defined(USE_DX9) RenderType::DirectX9 +#elif defined(USE_METAL) + RenderType::Metal #elif !defined(USE_OPENGL) RenderType::Vulkan #else diff --git a/core/deps/imgui/backends/imgui_impl_metal.h b/core/deps/imgui/backends/imgui_impl_metal.h new file mode 100644 index 0000000000..351c2eff73 --- /dev/null +++ b/core/deps/imgui/backends/imgui_impl_metal.h @@ -0,0 +1,74 @@ +// dear imgui: Renderer Backend for Metal +// This needs to be used along with a Platform Backend (e.g. OSX) + +// Implemented features: +// [X] Renderer: User texture binding. Use 'MTLTexture' as ImTextureID. Read the FAQ about ImTextureID! +// [X] Renderer: Large meshes support (64k+ vertices) even with 16-bit indices (ImGuiBackendFlags_RendererHasVtxOffset). + +// You can use unmodified imgui_impl_* files in your project. See examples/ folder for examples of using this. +// Prefer including the entire imgui/ repository into your project (either as a copy or as a submodule), and only build the backends you need. +// Learn about Dear ImGui: +// - FAQ https://dearimgui.com/faq +// - Getting Started https://dearimgui.com/getting-started +// - Documentation https://dearimgui.com/docs (same as your local docs/ folder). +// - Introduction, links and more at the top of imgui.cpp + +#include "imgui.h" // IMGUI_IMPL_API +#ifndef IMGUI_DISABLE + +//----------------------------------------------------------------------------- +// ObjC API +//----------------------------------------------------------------------------- + +#ifdef __OBJC__ + +@class MTLRenderPassDescriptor; +@protocol MTLDevice, MTLCommandBuffer, MTLRenderCommandEncoder; + +// Follow "Getting Started" link and check examples/ folder to learn about using backends! +IMGUI_IMPL_API bool ImGui_ImplMetal_Init(id device); +IMGUI_IMPL_API void ImGui_ImplMetal_Shutdown(); +IMGUI_IMPL_API void ImGui_ImplMetal_NewFrame(MTLRenderPassDescriptor* renderPassDescriptor); +IMGUI_IMPL_API void ImGui_ImplMetal_RenderDrawData(ImDrawData* drawData, + id commandBuffer, + id commandEncoder); + +// Called by Init/NewFrame/Shutdown +IMGUI_IMPL_API bool ImGui_ImplMetal_CreateFontsTexture(id device); +IMGUI_IMPL_API void ImGui_ImplMetal_DestroyFontsTexture(); +IMGUI_IMPL_API bool ImGui_ImplMetal_CreateDeviceObjects(id device); +IMGUI_IMPL_API void ImGui_ImplMetal_DestroyDeviceObjects(); + +#endif + +//----------------------------------------------------------------------------- +// C++ API +//----------------------------------------------------------------------------- + +// Enable Metal C++ binding support with '#define IMGUI_IMPL_METAL_CPP' in your imconfig.h file +// More info about using Metal from C++: https://developer.apple.com/metal/cpp/ + +#ifdef IMGUI_IMPL_METAL_CPP +#include +#ifndef __OBJC__ + +// Follow "Getting Started" link and check examples/ folder to learn about using backends! +IMGUI_IMPL_API bool ImGui_ImplMetal_Init(MTL::Device* device); +IMGUI_IMPL_API void ImGui_ImplMetal_Shutdown(); +IMGUI_IMPL_API void ImGui_ImplMetal_NewFrame(MTL::RenderPassDescriptor* renderPassDescriptor); +IMGUI_IMPL_API void ImGui_ImplMetal_RenderDrawData(ImDrawData* draw_data, + MTL::CommandBuffer* commandBuffer, + MTL::RenderCommandEncoder* commandEncoder); + +// Called by Init/NewFrame/Shutdown +IMGUI_IMPL_API bool ImGui_ImplMetal_CreateFontsTexture(MTL::Device* device); +IMGUI_IMPL_API void ImGui_ImplMetal_DestroyFontsTexture(); +IMGUI_IMPL_API bool ImGui_ImplMetal_CreateDeviceObjects(MTL::Device* device); +IMGUI_IMPL_API void ImGui_ImplMetal_DestroyDeviceObjects(); + +#endif +#endif + +//----------------------------------------------------------------------------- + +#endif // #ifndef IMGUI_DISABLE diff --git a/core/deps/imgui/backends/imgui_impl_metal.mm b/core/deps/imgui/backends/imgui_impl_metal.mm new file mode 100644 index 0000000000..5680dea9d4 --- /dev/null +++ b/core/deps/imgui/backends/imgui_impl_metal.mm @@ -0,0 +1,592 @@ +// dear imgui: Renderer Backend for Metal +// This needs to be used along with a Platform Backend (e.g. OSX) + +// Implemented features: +// [X] Renderer: User texture binding. Use 'MTLTexture' as ImTextureID. Read the FAQ about ImTextureID! +// [X] Renderer: Large meshes support (64k+ vertices) even with 16-bit indices (ImGuiBackendFlags_RendererHasVtxOffset). + +// You can use unmodified imgui_impl_* files in your project. See examples/ folder for examples of using this. +// Prefer including the entire imgui/ repository into your project (either as a copy or as a submodule), and only build the backends you need. +// Learn about Dear ImGui: +// - FAQ https://dearimgui.com/faq +// - Getting Started https://dearimgui.com/getting-started +// - Documentation https://dearimgui.com/docs (same as your local docs/ folder). +// - Introduction, links and more at the top of imgui.cpp + +// CHANGELOG +// (minor and older changes stripped away, please see git history for details) +// 2022-08-23: Metal: Update deprecated property 'sampleCount'->'rasterSampleCount'. +// 2022-07-05: Metal: Add dispatch synchronization. +// 2022-06-30: Metal: Use __bridge for ARC based systems. +// 2022-06-01: Metal: Fixed null dereference on exit inside command buffer completion handler. +// 2022-04-27: Misc: Store backend data in a per-context struct, allowing to use this backend with multiple contexts. +// 2022-01-03: Metal: Ignore ImDrawCmd where ElemCount == 0 (very rare but can technically be manufactured by user code). +// 2021-12-30: Metal: Added Metal C++ support. Enable with '#define IMGUI_IMPL_METAL_CPP' in your imconfig.h file. +// 2021-08-24: Metal: Fixed a crash when clipping rect larger than framebuffer is submitted. (#4464) +// 2021-05-19: Metal: Replaced direct access to ImDrawCmd::TextureId with a call to ImDrawCmd::GetTexID(). (will become a requirement) +// 2021-02-18: Metal: Change blending equation to preserve alpha in output buffer. +// 2021-01-25: Metal: Fixed texture storage mode when building on Mac Catalyst. +// 2019-05-29: Metal: Added support for large mesh (64K+ vertices), enable ImGuiBackendFlags_RendererHasVtxOffset flag. +// 2019-04-30: Metal: Added support for special ImDrawCallback_ResetRenderState callback to reset render state. +// 2019-02-11: Metal: Projecting clipping rectangles correctly using draw_data->FramebufferScale to allow multi-viewports for retina display. +// 2018-11-30: Misc: Setting up io.BackendRendererName so it can be displayed in the About Window. +// 2018-07-05: Metal: Added new Metal backend implementation. + +#include "imgui.h" +#ifndef IMGUI_DISABLE +#include "imgui_impl_metal.h" +#import +#import + +#pragma mark - Support classes + +// A wrapper around a MTLBuffer object that knows the last time it was reused +@interface MetalBuffer : NSObject +@property (nonatomic, strong) id buffer; +@property (nonatomic, assign) double lastReuseTime; +- (instancetype)initWithBuffer:(id)buffer; +@end + +// An object that encapsulates the data necessary to uniquely identify a +// render pipeline state. These are used as cache keys. +@interface FramebufferDescriptor : NSObject +@property (nonatomic, assign) unsigned long sampleCount; +@property (nonatomic, assign) MTLPixelFormat colorPixelFormat; +@property (nonatomic, assign) MTLPixelFormat depthPixelFormat; +@property (nonatomic, assign) MTLPixelFormat stencilPixelFormat; +- (instancetype)initWithRenderPassDescriptor:(MTLRenderPassDescriptor*)renderPassDescriptor; +@end + +// A singleton that stores long-lived objects that are needed by the Metal +// renderer backend. Stores the render pipeline state cache and the default +// font texture, and manages the reusable buffer cache. +@interface MetalContext : NSObject +@property (nonatomic, strong) id device; +@property (nonatomic, strong) id depthStencilState; +@property (nonatomic, strong) FramebufferDescriptor* framebufferDescriptor; // framebuffer descriptor for current frame; transient +@property (nonatomic, strong) NSMutableDictionary* renderPipelineStateCache; // pipeline cache; keyed on framebuffer descriptors +@property (nonatomic, strong, nullable) id fontTexture; +@property (nonatomic, strong) NSMutableArray* bufferCache; +@property (nonatomic, assign) double lastBufferCachePurge; +- (MetalBuffer*)dequeueReusableBufferOfLength:(NSUInteger)length device:(id)device; +- (id)renderPipelineStateForFramebufferDescriptor:(FramebufferDescriptor*)descriptor device:(id)device; +@end + +struct ImGui_ImplMetal_Data +{ + MetalContext* SharedMetalContext; + + ImGui_ImplMetal_Data() { memset((void*)this, 0, sizeof(*this)); } +}; + +static ImGui_ImplMetal_Data* ImGui_ImplMetal_GetBackendData() { return ImGui::GetCurrentContext() ? (ImGui_ImplMetal_Data*)ImGui::GetIO().BackendRendererUserData : nullptr; } +static void ImGui_ImplMetal_DestroyBackendData(){ IM_DELETE(ImGui_ImplMetal_GetBackendData()); } + +static inline CFTimeInterval GetMachAbsoluteTimeInSeconds() { return (CFTimeInterval)(double)(clock_gettime_nsec_np(CLOCK_UPTIME_RAW) / 1e9); } + +#ifdef IMGUI_IMPL_METAL_CPP + +#pragma mark - Dear ImGui Metal C++ Backend API + +bool ImGui_ImplMetal_Init(MTL::Device* device) +{ + return ImGui_ImplMetal_Init((__bridge id)(device)); +} + +void ImGui_ImplMetal_NewFrame(MTL::RenderPassDescriptor* renderPassDescriptor) +{ + ImGui_ImplMetal_NewFrame((__bridge MTLRenderPassDescriptor*)(renderPassDescriptor)); +} + +void ImGui_ImplMetal_RenderDrawData(ImDrawData* draw_data, + MTL::CommandBuffer* commandBuffer, + MTL::RenderCommandEncoder* commandEncoder) +{ + ImGui_ImplMetal_RenderDrawData(draw_data, + (__bridge id)(commandBuffer), + (__bridge id)(commandEncoder)); + +} + +bool ImGui_ImplMetal_CreateFontsTexture(MTL::Device* device) +{ + return ImGui_ImplMetal_CreateFontsTexture((__bridge id)(device)); +} + +bool ImGui_ImplMetal_CreateDeviceObjects(MTL::Device* device) +{ + return ImGui_ImplMetal_CreateDeviceObjects((__bridge id)(device)); +} + +#endif // #ifdef IMGUI_IMPL_METAL_CPP + +#pragma mark - Dear ImGui Metal Backend API + +bool ImGui_ImplMetal_Init(id device) +{ + ImGuiIO& io = ImGui::GetIO(); + IMGUI_CHECKVERSION(); + IM_ASSERT(io.BackendRendererUserData == nullptr && "Already initialized a renderer backend!"); + + ImGui_ImplMetal_Data* bd = IM_NEW(ImGui_ImplMetal_Data)(); + io.BackendRendererUserData = (void*)bd; + io.BackendRendererName = "imgui_impl_metal"; + io.BackendFlags |= ImGuiBackendFlags_RendererHasVtxOffset; // We can honor the ImDrawCmd::VtxOffset field, allowing for large meshes. + + bd->SharedMetalContext = [[MetalContext alloc] init]; + bd->SharedMetalContext.device = device; + + return true; +} + +void ImGui_ImplMetal_Shutdown() +{ + ImGui_ImplMetal_Data* bd = ImGui_ImplMetal_GetBackendData(); + IM_ASSERT(bd != nullptr && "No renderer backend to shutdown, or already shutdown?"); + ImGui_ImplMetal_DestroyDeviceObjects(); + ImGui_ImplMetal_DestroyBackendData(); + + ImGuiIO& io = ImGui::GetIO(); + io.BackendRendererName = nullptr; + io.BackendRendererUserData = nullptr; + io.BackendFlags &= ~ImGuiBackendFlags_RendererHasVtxOffset; +} + +void ImGui_ImplMetal_NewFrame(MTLRenderPassDescriptor* renderPassDescriptor) +{ + ImGui_ImplMetal_Data* bd = ImGui_ImplMetal_GetBackendData(); + IM_ASSERT(bd != nil && "Context or backend not initialized! Did you call ImGui_ImplMetal_Init()?"); + bd->SharedMetalContext.framebufferDescriptor = [[FramebufferDescriptor alloc] initWithRenderPassDescriptor:renderPassDescriptor]; + + if (bd->SharedMetalContext.depthStencilState == nil) + ImGui_ImplMetal_CreateDeviceObjects(bd->SharedMetalContext.device); +} + +static void ImGui_ImplMetal_SetupRenderState(ImDrawData* drawData, id commandBuffer, + id commandEncoder, id renderPipelineState, + MetalBuffer* vertexBuffer, size_t vertexBufferOffset) +{ + IM_UNUSED(commandBuffer); + ImGui_ImplMetal_Data* bd = ImGui_ImplMetal_GetBackendData(); + [commandEncoder setCullMode:MTLCullModeNone]; + [commandEncoder setDepthStencilState:bd->SharedMetalContext.depthStencilState]; + + // Setup viewport, orthographic projection matrix + // Our visible imgui space lies from draw_data->DisplayPos (top left) to + // draw_data->DisplayPos+data_data->DisplaySize (bottom right). DisplayMin is typically (0,0) for single viewport apps. + MTLViewport viewport = + { + .originX = 0.0, + .originY = 0.0, + .width = (double)(drawData->DisplaySize.x * drawData->FramebufferScale.x), + .height = (double)(drawData->DisplaySize.y * drawData->FramebufferScale.y), + .znear = 0.0, + .zfar = 1.0 + }; + [commandEncoder setViewport:viewport]; + + float L = drawData->DisplayPos.x; + float R = drawData->DisplayPos.x + drawData->DisplaySize.x; + float T = drawData->DisplayPos.y; + float B = drawData->DisplayPos.y + drawData->DisplaySize.y; + float N = (float)viewport.znear; + float F = (float)viewport.zfar; + const float ortho_projection[4][4] = + { + { 2.0f/(R-L), 0.0f, 0.0f, 0.0f }, + { 0.0f, 2.0f/(T-B), 0.0f, 0.0f }, + { 0.0f, 0.0f, 1/(F-N), 0.0f }, + { (R+L)/(L-R), (T+B)/(B-T), N/(F-N), 1.0f }, + }; + [commandEncoder setVertexBytes:&ortho_projection length:sizeof(ortho_projection) atIndex:1]; + + [commandEncoder setRenderPipelineState:renderPipelineState]; + + [commandEncoder setVertexBuffer:vertexBuffer.buffer offset:0 atIndex:0]; + [commandEncoder setVertexBufferOffset:vertexBufferOffset atIndex:0]; +} + +// Metal Render function. +void ImGui_ImplMetal_RenderDrawData(ImDrawData* drawData, id commandBuffer, id commandEncoder) +{ + ImGui_ImplMetal_Data* bd = ImGui_ImplMetal_GetBackendData(); + MetalContext* ctx = bd->SharedMetalContext; + + // Avoid rendering when minimized, scale coordinates for retina displays (screen coordinates != framebuffer coordinates) + int fb_width = (int)(drawData->DisplaySize.x * drawData->FramebufferScale.x); + int fb_height = (int)(drawData->DisplaySize.y * drawData->FramebufferScale.y); + if (fb_width <= 0 || fb_height <= 0 || drawData->CmdListsCount == 0) + return; + + // Try to retrieve a render pipeline state that is compatible with the framebuffer config for this frame + // The hit rate for this cache should be very near 100%. + id renderPipelineState = ctx.renderPipelineStateCache[ctx.framebufferDescriptor]; + if (renderPipelineState == nil) + { + // No luck; make a new render pipeline state + renderPipelineState = [ctx renderPipelineStateForFramebufferDescriptor:ctx.framebufferDescriptor device:commandBuffer.device]; + + // Cache render pipeline state for later reuse + ctx.renderPipelineStateCache[ctx.framebufferDescriptor] = renderPipelineState; + } + + size_t vertexBufferLength = (size_t)drawData->TotalVtxCount * sizeof(ImDrawVert); + size_t indexBufferLength = (size_t)drawData->TotalIdxCount * sizeof(ImDrawIdx); + MetalBuffer* vertexBuffer = [ctx dequeueReusableBufferOfLength:vertexBufferLength device:commandBuffer.device]; + MetalBuffer* indexBuffer = [ctx dequeueReusableBufferOfLength:indexBufferLength device:commandBuffer.device]; + + ImGui_ImplMetal_SetupRenderState(drawData, commandBuffer, commandEncoder, renderPipelineState, vertexBuffer, 0); + + // Will project scissor/clipping rectangles into framebuffer space + ImVec2 clip_off = drawData->DisplayPos; // (0,0) unless using multi-viewports + ImVec2 clip_scale = drawData->FramebufferScale; // (1,1) unless using retina display which are often (2,2) + + // Render command lists + size_t vertexBufferOffset = 0; + size_t indexBufferOffset = 0; + for (int n = 0; n < drawData->CmdListsCount; n++) + { + const ImDrawList* draw_list = drawData->CmdLists[n]; + + memcpy((char*)vertexBuffer.buffer.contents + vertexBufferOffset, draw_list->VtxBuffer.Data, (size_t)draw_list->VtxBuffer.Size * sizeof(ImDrawVert)); + memcpy((char*)indexBuffer.buffer.contents + indexBufferOffset, draw_list->IdxBuffer.Data, (size_t)draw_list->IdxBuffer.Size * sizeof(ImDrawIdx)); + + for (int cmd_i = 0; cmd_i < draw_list->CmdBuffer.Size; cmd_i++) + { + const ImDrawCmd* pcmd = &draw_list->CmdBuffer[cmd_i]; + if (pcmd->UserCallback) + { + // User callback, registered via ImDrawList::AddCallback() + // (ImDrawCallback_ResetRenderState is a special callback value used by the user to request the renderer to reset render state.) + if (pcmd->UserCallback == ImDrawCallback_ResetRenderState) + ImGui_ImplMetal_SetupRenderState(drawData, commandBuffer, commandEncoder, renderPipelineState, vertexBuffer, vertexBufferOffset); + else + pcmd->UserCallback(draw_list, pcmd); + } + else + { + // Project scissor/clipping rectangles into framebuffer space + ImVec2 clip_min((pcmd->ClipRect.x - clip_off.x) * clip_scale.x, (pcmd->ClipRect.y - clip_off.y) * clip_scale.y); + ImVec2 clip_max((pcmd->ClipRect.z - clip_off.x) * clip_scale.x, (pcmd->ClipRect.w - clip_off.y) * clip_scale.y); + + // Clamp to viewport as setScissorRect() won't accept values that are off bounds + if (clip_min.x < 0.0f) { clip_min.x = 0.0f; } + if (clip_min.y < 0.0f) { clip_min.y = 0.0f; } + if (clip_max.x > fb_width) { clip_max.x = (float)fb_width; } + if (clip_max.y > fb_height) { clip_max.y = (float)fb_height; } + if (clip_max.x <= clip_min.x || clip_max.y <= clip_min.y) + continue; + if (pcmd->ElemCount == 0) // drawIndexedPrimitives() validation doesn't accept this + continue; + + // Apply scissor/clipping rectangle + MTLScissorRect scissorRect = + { + .x = NSUInteger(clip_min.x), + .y = NSUInteger(clip_min.y), + .width = NSUInteger(clip_max.x - clip_min.x), + .height = NSUInteger(clip_max.y - clip_min.y) + }; + [commandEncoder setScissorRect:scissorRect]; + + // Bind texture, Draw + if (ImTextureID tex_id = pcmd->GetTexID()) + [commandEncoder setFragmentTexture:(__bridge id)(void*)(intptr_t)(tex_id) atIndex:0]; + + [commandEncoder setVertexBufferOffset:(vertexBufferOffset + pcmd->VtxOffset * sizeof(ImDrawVert)) atIndex:0]; + [commandEncoder drawIndexedPrimitives:MTLPrimitiveTypeTriangle + indexCount:pcmd->ElemCount + indexType:sizeof(ImDrawIdx) == 2 ? MTLIndexTypeUInt16 : MTLIndexTypeUInt32 + indexBuffer:indexBuffer.buffer + indexBufferOffset:indexBufferOffset + pcmd->IdxOffset * sizeof(ImDrawIdx)]; + } + } + + vertexBufferOffset += (size_t)draw_list->VtxBuffer.Size * sizeof(ImDrawVert); + indexBufferOffset += (size_t)draw_list->IdxBuffer.Size * sizeof(ImDrawIdx); + } + + [commandBuffer addCompletedHandler:^(id) + { + dispatch_async(dispatch_get_main_queue(), ^{ + ImGui_ImplMetal_Data* bd = ImGui_ImplMetal_GetBackendData(); + if (bd != nullptr) + { + @synchronized(bd->SharedMetalContext.bufferCache) + { + [bd->SharedMetalContext.bufferCache addObject:vertexBuffer]; + [bd->SharedMetalContext.bufferCache addObject:indexBuffer]; + } + } + }); + }]; +} + +bool ImGui_ImplMetal_CreateFontsTexture(id device) +{ + ImGui_ImplMetal_Data* bd = ImGui_ImplMetal_GetBackendData(); + ImGuiIO& io = ImGui::GetIO(); + + // We are retrieving and uploading the font atlas as a 4-channels RGBA texture here. + // In theory we could call GetTexDataAsAlpha8() and upload a 1-channel texture to save on memory access bandwidth. + // However, using a shader designed for 1-channel texture would make it less obvious to use the ImTextureID facility to render users own textures. + // You can make that change in your implementation. + unsigned char* pixels; + int width, height; + io.Fonts->GetTexDataAsRGBA32(&pixels, &width, &height); + MTLTextureDescriptor* textureDescriptor = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:MTLPixelFormatRGBA8Unorm + width:(NSUInteger)width + height:(NSUInteger)height + mipmapped:NO]; + textureDescriptor.usage = MTLTextureUsageShaderRead; +#if TARGET_OS_OSX || TARGET_OS_MACCATALYST + textureDescriptor.storageMode = MTLStorageModeManaged; +#else + textureDescriptor.storageMode = MTLStorageModeShared; +#endif + id texture = [device newTextureWithDescriptor:textureDescriptor]; + [texture replaceRegion:MTLRegionMake2D(0, 0, (NSUInteger)width, (NSUInteger)height) mipmapLevel:0 withBytes:pixels bytesPerRow:(NSUInteger)width * 4]; + bd->SharedMetalContext.fontTexture = texture; + io.Fonts->SetTexID((ImTextureID)(intptr_t)(__bridge void*)bd->SharedMetalContext.fontTexture); // ImTextureID == ImU64 + + return (bd->SharedMetalContext.fontTexture != nil); +} + +void ImGui_ImplMetal_DestroyFontsTexture() +{ + ImGui_ImplMetal_Data* bd = ImGui_ImplMetal_GetBackendData(); + ImGuiIO& io = ImGui::GetIO(); + bd->SharedMetalContext.fontTexture = nil; + io.Fonts->SetTexID(0); +} + +bool ImGui_ImplMetal_CreateDeviceObjects(id device) +{ + ImGui_ImplMetal_Data* bd = ImGui_ImplMetal_GetBackendData(); + MTLDepthStencilDescriptor* depthStencilDescriptor = [[MTLDepthStencilDescriptor alloc] init]; + depthStencilDescriptor.depthWriteEnabled = NO; + depthStencilDescriptor.depthCompareFunction = MTLCompareFunctionAlways; + bd->SharedMetalContext.depthStencilState = [device newDepthStencilStateWithDescriptor:depthStencilDescriptor]; + ImGui_ImplMetal_CreateFontsTexture(device); + + return true; +} + +void ImGui_ImplMetal_DestroyDeviceObjects() +{ + ImGui_ImplMetal_Data* bd = ImGui_ImplMetal_GetBackendData(); + ImGui_ImplMetal_DestroyFontsTexture(); + [bd->SharedMetalContext.renderPipelineStateCache removeAllObjects]; +} + +#pragma mark - MetalBuffer implementation + +@implementation MetalBuffer +- (instancetype)initWithBuffer:(id)buffer +{ + if ((self = [super init])) + { + _buffer = buffer; + _lastReuseTime = GetMachAbsoluteTimeInSeconds(); + } + return self; +} +@end + +#pragma mark - FramebufferDescriptor implementation + +@implementation FramebufferDescriptor +- (instancetype)initWithRenderPassDescriptor:(MTLRenderPassDescriptor*)renderPassDescriptor +{ + if ((self = [super init])) + { + _sampleCount = renderPassDescriptor.colorAttachments[0].texture.sampleCount; + _colorPixelFormat = renderPassDescriptor.colorAttachments[0].texture.pixelFormat; + _depthPixelFormat = renderPassDescriptor.depthAttachment.texture.pixelFormat; + _stencilPixelFormat = renderPassDescriptor.stencilAttachment.texture.pixelFormat; + } + return self; +} + +- (nonnull id)copyWithZone:(nullable NSZone*)zone +{ + FramebufferDescriptor* copy = [[FramebufferDescriptor allocWithZone:zone] init]; + copy.sampleCount = self.sampleCount; + copy.colorPixelFormat = self.colorPixelFormat; + copy.depthPixelFormat = self.depthPixelFormat; + copy.stencilPixelFormat = self.stencilPixelFormat; + return copy; +} + +- (NSUInteger)hash +{ + NSUInteger sc = _sampleCount & 0x3; + NSUInteger cf = _colorPixelFormat & 0x3FF; + NSUInteger df = _depthPixelFormat & 0x3FF; + NSUInteger sf = _stencilPixelFormat & 0x3FF; + NSUInteger hash = (sf << 22) | (df << 12) | (cf << 2) | sc; + return hash; +} + +- (BOOL)isEqual:(id)object +{ + FramebufferDescriptor* other = object; + if (![other isKindOfClass:[FramebufferDescriptor class]]) + return NO; + return other.sampleCount == self.sampleCount && + other.colorPixelFormat == self.colorPixelFormat && + other.depthPixelFormat == self.depthPixelFormat && + other.stencilPixelFormat == self.stencilPixelFormat; +} + +@end + +#pragma mark - MetalContext implementation + +@implementation MetalContext +- (instancetype)init +{ + if ((self = [super init])) + { + self.renderPipelineStateCache = [NSMutableDictionary dictionary]; + self.bufferCache = [NSMutableArray array]; + _lastBufferCachePurge = GetMachAbsoluteTimeInSeconds(); + } + return self; +} + +- (MetalBuffer*)dequeueReusableBufferOfLength:(NSUInteger)length device:(id)device +{ + uint64_t now = GetMachAbsoluteTimeInSeconds(); + + @synchronized(self.bufferCache) + { + // Purge old buffers that haven't been useful for a while + if (now - self.lastBufferCachePurge > 1.0) + { + NSMutableArray* survivors = [NSMutableArray array]; + for (MetalBuffer* candidate in self.bufferCache) + if (candidate.lastReuseTime > self.lastBufferCachePurge) + [survivors addObject:candidate]; + self.bufferCache = [survivors mutableCopy]; + self.lastBufferCachePurge = now; + } + + // See if we have a buffer we can reuse + MetalBuffer* bestCandidate = nil; + for (MetalBuffer* candidate in self.bufferCache) + if (candidate.buffer.length >= length && (bestCandidate == nil || bestCandidate.lastReuseTime > candidate.lastReuseTime)) + bestCandidate = candidate; + + if (bestCandidate != nil) + { + [self.bufferCache removeObject:bestCandidate]; + bestCandidate.lastReuseTime = now; + return bestCandidate; + } + } + + // No luck; make a new buffer + id backing = [device newBufferWithLength:length options:MTLResourceStorageModeShared]; + return [[MetalBuffer alloc] initWithBuffer:backing]; +} + +// Bilinear sampling is required by default. Set 'io.Fonts->Flags |= ImFontAtlasFlags_NoBakedLines' or 'style.AntiAliasedLinesUseTex = false' to allow point/nearest sampling. +- (id)renderPipelineStateForFramebufferDescriptor:(FramebufferDescriptor*)descriptor device:(id)device +{ + NSError* error = nil; + + NSString* shaderSource = @"" + "#include \n" + "using namespace metal;\n" + "\n" + "struct Uniforms {\n" + " float4x4 projectionMatrix;\n" + "};\n" + "\n" + "struct VertexIn {\n" + " float2 position [[attribute(0)]];\n" + " float2 texCoords [[attribute(1)]];\n" + " uchar4 color [[attribute(2)]];\n" + "};\n" + "\n" + "struct VertexOut {\n" + " float4 position [[position]];\n" + " float2 texCoords;\n" + " float4 color;\n" + "};\n" + "\n" + "vertex VertexOut vertex_main(VertexIn in [[stage_in]],\n" + " constant Uniforms &uniforms [[buffer(1)]]) {\n" + " VertexOut out;\n" + " out.position = uniforms.projectionMatrix * float4(in.position, 0, 1);\n" + " out.texCoords = in.texCoords;\n" + " out.color = float4(in.color) / float4(255.0);\n" + " return out;\n" + "}\n" + "\n" + "fragment half4 fragment_main(VertexOut in [[stage_in]],\n" + " texture2d texture [[texture(0)]]) {\n" + " constexpr sampler linearSampler(coord::normalized, min_filter::linear, mag_filter::linear, mip_filter::linear);\n" + " half4 texColor = texture.sample(linearSampler, in.texCoords);\n" + " return half4(in.color) * texColor;\n" + "}\n"; + + id library = [device newLibraryWithSource:shaderSource options:nil error:&error]; + if (library == nil) + { + NSLog(@"Error: failed to create Metal library: %@", error); + return nil; + } + + id vertexFunction = [library newFunctionWithName:@"vertex_main"]; + id fragmentFunction = [library newFunctionWithName:@"fragment_main"]; + + if (vertexFunction == nil || fragmentFunction == nil) + { + NSLog(@"Error: failed to find Metal shader functions in library: %@", error); + return nil; + } + + MTLVertexDescriptor* vertexDescriptor = [MTLVertexDescriptor vertexDescriptor]; + vertexDescriptor.attributes[0].offset = offsetof(ImDrawVert, pos); + vertexDescriptor.attributes[0].format = MTLVertexFormatFloat2; // position + vertexDescriptor.attributes[0].bufferIndex = 0; + vertexDescriptor.attributes[1].offset = offsetof(ImDrawVert, uv); + vertexDescriptor.attributes[1].format = MTLVertexFormatFloat2; // texCoords + vertexDescriptor.attributes[1].bufferIndex = 0; + vertexDescriptor.attributes[2].offset = offsetof(ImDrawVert, col); + vertexDescriptor.attributes[2].format = MTLVertexFormatUChar4; // color + vertexDescriptor.attributes[2].bufferIndex = 0; + vertexDescriptor.layouts[0].stepRate = 1; + vertexDescriptor.layouts[0].stepFunction = MTLVertexStepFunctionPerVertex; + vertexDescriptor.layouts[0].stride = sizeof(ImDrawVert); + + MTLRenderPipelineDescriptor* pipelineDescriptor = [[MTLRenderPipelineDescriptor alloc] init]; + pipelineDescriptor.vertexFunction = vertexFunction; + pipelineDescriptor.fragmentFunction = fragmentFunction; + pipelineDescriptor.vertexDescriptor = vertexDescriptor; + pipelineDescriptor.rasterSampleCount = self.framebufferDescriptor.sampleCount; + pipelineDescriptor.colorAttachments[0].pixelFormat = self.framebufferDescriptor.colorPixelFormat; + pipelineDescriptor.colorAttachments[0].blendingEnabled = YES; + pipelineDescriptor.colorAttachments[0].rgbBlendOperation = MTLBlendOperationAdd; + pipelineDescriptor.colorAttachments[0].sourceRGBBlendFactor = MTLBlendFactorSourceAlpha; + pipelineDescriptor.colorAttachments[0].destinationRGBBlendFactor = MTLBlendFactorOneMinusSourceAlpha; + pipelineDescriptor.colorAttachments[0].alphaBlendOperation = MTLBlendOperationAdd; + pipelineDescriptor.colorAttachments[0].sourceAlphaBlendFactor = MTLBlendFactorOne; + pipelineDescriptor.colorAttachments[0].destinationAlphaBlendFactor = MTLBlendFactorOneMinusSourceAlpha; + pipelineDescriptor.depthAttachmentPixelFormat = self.framebufferDescriptor.depthPixelFormat; + pipelineDescriptor.stencilAttachmentPixelFormat = self.framebufferDescriptor.stencilPixelFormat; + + id renderPipelineState = [device newRenderPipelineStateWithDescriptor:pipelineDescriptor error:&error]; + if (error != nil) + NSLog(@"Error: failed to create Metal pipeline state: %@", error); + + return renderPipelineState; +} + +@end + +//----------------------------------------------------------------------------- + +#endif // #ifndef IMGUI_DISABLE diff --git a/core/hw/pvr/Renderer_if.cpp b/core/hw/pvr/Renderer_if.cpp index 7f669f3fd0..eb3afd4c0b 100644 --- a/core/hw/pvr/Renderer_if.cpp +++ b/core/hw/pvr/Renderer_if.cpp @@ -268,6 +268,7 @@ Renderer* rend_norend(); Renderer* rend_Vulkan(); Renderer* rend_OITVulkan(); Renderer* rend_DirectX9(); +Renderer* rend_Metal(); Renderer* rend_DirectX11(); Renderer* rend_OITDirectX11(); @@ -302,6 +303,11 @@ static void rend_create_renderer() renderer = rend_DirectX9(); break; #endif +#ifdef USE_METAL + case RenderType::Metal: + renderer = rend_Metal(); + break; +#endif #ifdef USE_DX11 case RenderType::DirectX11: renderer = rend_DirectX11(); diff --git a/core/rend/metal/metal_buffer.h b/core/rend/metal/metal_buffer.h new file mode 100644 index 0000000000..5148729055 --- /dev/null +++ b/core/rend/metal/metal_buffer.h @@ -0,0 +1,118 @@ +/* + Copyright 2024 flyinghead + + This file is part of Flycast. + + Flycast is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + Flycast is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Flycast. If not, see . +*/ +#pragma once +#include "types.h" +#include + +struct MetalBufferData +{ + MetalBufferData(u64 size); + ~MetalBufferData() + { + [buffer setPurgeableState:MTLPurgeableStateEmpty]; + [buffer release]; + buffer = nil; + } + + void upload(u32 size, const void *data, u32 bufOffset = 0) const + { + verify(bufOffset + size <= bufferSize); + + void* dataPtr = (u8 *)[buffer contents] + bufOffset; + memcpy(dataPtr, data, size); + } + + void upload(size_t count, const u32 *sizes, const void * const *data, u32 bufOffset = 0) const + { + u32 totalSize = 0; + for (size_t i = 0; i < count; ++i) + totalSize += sizes[i]; + verify(bufOffset + totalSize <= bufferSize); + void* dataPtr = (u8 *)[buffer contents] + bufOffset; + for (size_t i = 0; i < count; ++i) + { + if (data[i] != nullptr) + memcpy(dataPtr, data[i], sizes[i]); + dataPtr = (u8 *)dataPtr + sizes[i]; + } + } + + void download(u32 size, void *data, u32 bufOffset = 0) const + { + verify(bufOffset + size <= bufferSize); + + void* dataPtr = (u8 *)[buffer contents] + bufOffset; + memcpy(data, dataPtr, size); + } + + id buffer; + u64 bufferSize; +}; + +class BufferPacker +{ +public: + BufferPacker(); + + u64 addUniform(const void *p, size_t size) { + return add(p, size); + } + + u64 addStorage(const void *p, size_t size) { + return add(p, size); + } + + u64 add(const void *p, size_t size) + { + u32 padding = align(offset, 16); + if (padding != 0) + { + chunks.push_back(nullptr); + chunkSizes.push_back(padding); + offset += padding; + } + u64 start = offset; + chunks.push_back(p); + chunkSizes.push_back(size); + offset += size; + + return start; + } + + void upload(MetalBufferData& bufferData, u32 bufOffset = 0) + { + if (!chunks.empty()) + bufferData.upload(chunks.size(), &chunkSizes[0], &chunks[0], bufOffset); + } + + u64 size() const { + return offset; + } + +private: + std::vector chunks; + std::vector chunkSizes; + u64 offset = 0; + + static inline u32 align(u64 offset, u32 alignment) + { + u32 pad = (u32)(offset & (alignment - 1)); + return pad == 0 ? 0 : alignment - pad; + } +}; \ No newline at end of file diff --git a/core/rend/metal/metal_buffer.mm b/core/rend/metal/metal_buffer.mm new file mode 100644 index 0000000000..7a1936259d --- /dev/null +++ b/core/rend/metal/metal_buffer.mm @@ -0,0 +1,28 @@ +/* + Copyright 2024 flyinghead + + This file is part of Flycast. + + Flycast is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + Flycast is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Flycast. If not, see . +*/ + +#include "metal_buffer.h" + +#include "metal_context.h" + +MetalBufferData::MetalBufferData(u64 size) : bufferSize(size) { + auto device = MetalContext::Instance()->GetDevice(); + + buffer = [device newBufferWithLength:size options:MTLResourceStorageModeShared]; +} diff --git a/core/rend/metal/metal_context.h b/core/rend/metal/metal_context.h new file mode 100644 index 0000000000..65fea3ed2c --- /dev/null +++ b/core/rend/metal/metal_context.h @@ -0,0 +1,64 @@ +/* +Copyright 2024 flyinghead + + This file is part of Flycast. + + Flycast is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + Flycast is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Flycast. If not, see . +*/ +#pragma once +#include +#include + +#include "wsi/context.h" + +class MetalContext : public GraphicsContext +{ +public: + MetalContext(); + ~MetalContext() override; + + bool init(); + void term() override; + + id GetDevice() const { return device; } + CAMetalLayer* GetLayer() const { return layer; } + id GetQueue() const { return queue; } + id commandBuffer = nil; + void resize() override; + void Present(); + + std::string getDriverName() override { + return [[device name] UTF8String]; + } + + std::string getDriverVersion() override { + return ""; + } + + bool isAMD() override { + return false; + } + + bool hasPerPixel() override { + return true; + } + + static MetalContext* Instance() { return contextInstance; } +private: + NSAutoreleasePool *pool; + id device = MTLCreateSystemDefaultDevice(); + id queue = nil; + CAMetalLayer* layer; + static MetalContext* contextInstance; +}; \ No newline at end of file diff --git a/core/rend/metal/metal_context.mm b/core/rend/metal/metal_context.mm new file mode 100644 index 0000000000..a92db6a0a6 --- /dev/null +++ b/core/rend/metal/metal_context.mm @@ -0,0 +1,85 @@ +/* +Copyright 2024 flyinghead + + This file is part of Flycast. + + Flycast is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + Flycast is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Flycast. If not, see . +*/ + +#include "metal_context.h" +#include "metal_driver.h" +#include "sdl/sdl.h" +#include "ui/imgui_driver.h" + +MetalContext *MetalContext::contextInstance; + +bool MetalContext::init() { + pool = [[NSAutoreleasePool alloc] init]; + GraphicsContext::instance = this; + +#if defined(USE_SDL) + if (!sdl_recreate_window(SDL_WINDOW_METAL)) + return false; + + auto view = SDL_Metal_CreateView((SDL_Window *)window); + + if (view == nullptr) { + term(); + ERROR_LOG(RENDERER, "Failed to create SDL Metal View"); + return false; + } + + layer = static_cast>(SDL_Metal_GetLayer(view)); +#endif + + if (!device) { + term(); + NOTICE_LOG(RENDERER, "Metal Device is null."); + return false; + } + + [layer setDevice:device]; + queue = [device newCommandQueue]; + commandBuffer = [queue commandBuffer]; + + NOTICE_LOG(RENDERER, "Created Metal view."); + + imguiDriver = std::unique_ptr(new MetalDriver()); + return true; +} + +void MetalContext::resize() { + +} + +void MetalContext::Present() { + +} + +void MetalContext::term() { + GraphicsContext::instance = nullptr; + imguiDriver.reset(); + [pool release]; +} + +MetalContext::MetalContext() { + verify(contextInstance == nullptr); + contextInstance = this; +} + +MetalContext::~MetalContext() { + verify(contextInstance == this); + contextInstance = nullptr; +} + diff --git a/core/rend/metal/metal_driver.h b/core/rend/metal/metal_driver.h new file mode 100644 index 0000000000..d4c7f2b889 --- /dev/null +++ b/core/rend/metal/metal_driver.h @@ -0,0 +1,122 @@ +/* + Copyright 2024 flyinghead + + This file is part of Flycast. + + Flycast is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + Flycast is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Flycast. If not, see . +*/ +#pragma once +#include "ui/imgui_driver.h" +#include "imgui_impl_metal.h" +#include "metal_context.h" +#include + +#include "metal_texture.h" + +class MetalDriver final : public ImGuiDriver { +public: + MetalDriver() { + ImGui_ImplMetal_Init(MetalContext::Instance()->GetDevice()); + } + + void reset() override + { + ImGuiDriver::reset(); + ImGui_ImplMetal_Shutdown(); + } + + void newFrame() override { + MetalContext *context = MetalContext::Instance(); + drawable = [context->GetLayer() nextDrawable]; + + MTLRenderPassDescriptor *descriptor = [[MTLRenderPassDescriptor alloc] init]; + + [descriptor setDefaultRasterSampleCount:1]; + + auto color = [descriptor colorAttachments][0]; + [color setClearColor:MTLClearColorMake(0.f, 0.f, 0.f, 1.f)]; + [color setTexture:[drawable texture]]; + [color setLoadAction:MTLLoadActionClear]; + [color setStoreAction:MTLStoreActionStore]; + + commandEncoder = [context->commandBuffer renderCommandEncoderWithDescriptor:descriptor]; + + ImGui_ImplMetal_NewFrame(descriptor); + + [descriptor release]; + } + + void renderDrawData(ImDrawData *drawData, bool gui_open) override { + MetalContext *context = MetalContext::Instance(); + id buffer = context->commandBuffer; + + ImGui_ImplMetal_RenderDrawData(drawData, buffer, commandEncoder); + + [commandEncoder endEncoding]; + [buffer presentDrawable:drawable]; + [buffer commit]; + + commandEncoder = nil; + + context->commandBuffer = [context->GetQueue() commandBuffer]; + + if (gui_open) + frameRendered = true; + } + + void present() override { + if (frameRendered) + //MetalContext::Instance()->GetDevice().pre + frameRendered = false; + } + + ImTextureID getTexture(const std::string &name) override { + auto it = textures.find(name); + if (it != textures.end()) + return &it->second.texture; + + return ImTextureID{}; + } + + ImTextureID updateTexture(const std::string &name, const u8 *data, int width, int height, bool nearestSampling) override { + Texture texture(std::make_unique()); + texture.texture->tex_type = TextureType::_8888; + texture.texture->UploadToGPU(width, height, data, false); + + ImTextureID textureID = texture.texture->texture; + + textures[name] = std::move(texture); + + return textureID; + } + + void deleteTexture(const std::string &name) override { + auto it = textures.find(name); + [it->second.texture->texture setPurgeableState:MTLPurgeableStateEmpty]; + textures.erase(name); + } + +private: + struct Texture { + Texture() = default; + Texture(std::unique_ptr&& texture) : texture(std::move(texture)) {} + + std::unique_ptr texture; + }; + + bool frameRendered = false; + id commandEncoder; + CAMetalDrawable *drawable; + std::unordered_map textures; +}; diff --git a/core/rend/metal/metal_pipeline.h b/core/rend/metal/metal_pipeline.h new file mode 100644 index 0000000000..f52525c12a --- /dev/null +++ b/core/rend/metal/metal_pipeline.h @@ -0,0 +1,274 @@ +/* + Copyright 2024 flyinghead + + This file is part of Flycast. + + Flycast is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + Flycast is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Flycast. If not, see . +*/ +#pragma once +#include "types.h" +#include +#include + +#include "cfg/option.h" +#include "hw/pvr/ta_ctx.h" + +class MetalRenderer; + +enum class ModVolMode { Xor, Or, Inclusion, Exclusion, Final }; + +class MetalPipelineManager +{ +public: + explicit MetalPipelineManager(MetalRenderer *renderer); + virtual ~MetalPipelineManager() = default; + + void term() + { + pipelines.clear(); + depthPassPipelines.clear(); + depthStencilStates.clear(); + depthPassDepthStencilStates.clear(); + } + + id GetBlitPassPipeline() { + if (blitPassPipeline != nullptr) + return blitPassPipeline; + + CreateBlitPassPipeline(); + + return blitPassPipeline; + } + + id GetDepthPassPipeline(int cullMode, bool naomi2) + { + u32 pipehash = hash(cullMode, naomi2); + const auto &pipeline = depthPassPipelines.find(pipehash); + if (pipeline != depthPassPipelines.end() && pipeline->second != nullptr) + return pipeline->second; + CreateDepthPassPipeline(cullMode, naomi2); + + return depthPassPipelines[pipehash]; + } + + id GetModifierVolumePipeline(ModVolMode mode, int cullMode, bool naomi2) + { + u32 pipehash = hash(mode, cullMode, naomi2); + const auto &pipeline = modVolPipelines.find(pipehash); + if (pipeline != modVolPipelines.end() && pipeline->second != nullptr) + return pipeline->second; + CreateModVolPipeline(mode, cullMode, naomi2); + + return modVolPipelines[pipehash]; + } + + id GetPipeline(u32 listType, bool sortTriangles, const PolyParam& pp, int gpuPalette, bool dithering) + { + u64 pipehash = hash(listType, sortTriangles, &pp, gpuPalette, dithering); + const auto &pipeline = pipelines.find(pipehash); + if (pipeline != pipelines.end() && pipeline->second != nullptr) + return pipeline->second; + CreatePipeline(listType, sortTriangles, pp, gpuPalette, dithering); + + return pipelines[pipehash]; + } + + id GetModVolDepthStencilStates(ModVolMode mode, int cullMode, bool naomi2) + { + u32 pipehash = hash(mode, cullMode, naomi2); + const auto &state = modVolStencilStates.find(pipehash); + if (state != modVolStencilStates.end() && state->second != nullptr) + return state->second; + CreateModVolDepthStencilState(mode, cullMode, naomi2); + + return modVolStencilStates[pipehash]; + } + + id GetDepthPassDepthStencilStates(int cullMode, bool naomi2) + { + u32 pipehash = hash(cullMode, naomi2); + const auto &state = depthPassDepthStencilStates.find(pipehash); + if (state != depthPassDepthStencilStates.end() && state->second != nullptr) + return state->second; + CreateDepthPassDepthStencilState(cullMode, naomi2); + + return depthPassDepthStencilStates[pipehash]; + } + + id GetDepthStencilStates(u32 listType, bool sortTriangles, bool shadowed, const PolyParam& pp) + { + u64 pipehash = hash(listType, sortTriangles, shadowed, &pp); + + const auto &state = depthStencilStates.find(pipehash); + if (state != depthStencilStates.end() && state->second != nullptr) + return state->second; + CreateDepthStencilState(listType, sortTriangles, shadowed, pp); + + return depthStencilStates[pipehash]; + } + +private: + void CreateBlitPassPipeline(); + void CreateModVolPipeline(ModVolMode mode, int cullMode, bool naomi2); + void CreateDepthPassPipeline(int cullMode, bool naomi2); + void CreatePipeline(u32 listType, bool sortTriangles, const PolyParam& pp, int gpuPalette, bool dithering); + + void CreateModVolDepthStencilState(ModVolMode mode, int cullMode, bool naomi2); + void CreateDepthPassDepthStencilState(int cullMode, bool naomi2); + void CreateDepthStencilState(u32 listType, bool sortTriangles, bool shadowed, const PolyParam& pp); + + u64 hash(u32 listType, bool sortTriangles, const PolyParam *pp, int gpuPalette, bool dithering) const + { + u64 hash = pp->pcw.Gouraud | (pp->pcw.Offset << 1) | (pp->pcw.Texture << 2) | (pp->pcw.Shadow << 3) + | (((pp->tileclip >> 28) == 3) << 4); + hash |= ((listType >> 1) << 5); + bool ignoreTexAlpha = pp->tsp.IgnoreTexA || pp->tcw.PixelFmt == Pixel565; + hash |= (pp->tsp.ShadInstr << 7) | (ignoreTexAlpha << 9) | (pp->tsp.UseAlpha << 10) + | (pp->tsp.ColorClamp << 11) | ((config::Fog ? pp->tsp.FogCtrl : 2) << 12) | (pp->tsp.SrcInstr << 14) + | (pp->tsp.DstInstr << 17); + hash |= (pp->isp.ZWriteDis << 20) | (pp->isp.CullMode << 21) | (pp->isp.DepthMode << 23); + hash |= ((u64)sortTriangles << 26) | ((u64)gpuPalette << 27) | ((u64)pp->isNaomi2() << 29); + hash |= (u64)(!settings.platform.isNaomi2() && config::NativeDepthInterpolation) << 30; + hash |= (u64)(pp->tcw.PixelFmt == PixelBumpMap) << 31; + hash |= (u64)dithering << 32; + + return hash; + } + u64 hash(u32 listType, bool sortTriangles, bool shadowed, const PolyParam *pp) const + { + u64 hash = pp->pcw.Gouraud | (pp->pcw.Offset << 1) | (pp->pcw.Texture << 2) | (pp->pcw.Shadow << 3) + | (((pp->tileclip >> 28) == 3) << 4); + hash |= ((listType >> 1) << 5); + bool ignoreTexAlpha = pp->tsp.IgnoreTexA || pp->tcw.PixelFmt == Pixel565; + hash |= (pp->tsp.ShadInstr << 7) | (ignoreTexAlpha << 9) | (pp->tsp.UseAlpha << 10) + | (pp->tsp.ColorClamp << 11) | ((config::Fog ? pp->tsp.FogCtrl : 2) << 12) | (pp->tsp.SrcInstr << 14) + | (pp->tsp.DstInstr << 17); + hash |= (pp->isp.ZWriteDis << 20) | (pp->isp.CullMode << 21) | (pp->isp.DepthMode << 23); + hash |= ((u64)sortTriangles << 26) | ((u64)pp->isNaomi2() << 29); + hash |= (u64)(!settings.platform.isNaomi2() && config::NativeDepthInterpolation) << 30; + hash |= (u64)(pp->tcw.PixelFmt == PixelBumpMap) << 31; + + return hash; + } + u32 hash(ModVolMode mode, int cullMode, bool naomi2) const + { + return ((int)mode << 2) | cullMode | ((int)naomi2 << 5) | ((int)(!settings.platform.isNaomi2() && config::NativeDepthInterpolation) << 6); + } + u32 hash(int cullMode, bool naomi2) const + { + return cullMode | ((int)naomi2 << 2) | ((int)(!settings.platform.isNaomi2() && config::NativeDepthInterpolation) << 3); + } + + MTLVertexDescriptor* GetMainVertexInputDescriptor(bool full = true, bool naomi2 = false) const + { + MTLVertexDescriptor *vertexDesc = [[MTLVertexDescriptor alloc] init]; + + auto pos = [vertexDesc attributes][0]; // pos + [pos setFormat:MTLVertexFormatFloat3]; + [pos setOffset:offsetof(Vertex, x)]; + [pos setBufferIndex:30]; + + if (full) { + auto col = [vertexDesc attributes][1]; // base color + [col setFormat:MTLVertexFormatUChar4Normalized]; + [col setOffset:offsetof(Vertex, col)]; + [col setBufferIndex:30]; + + auto spc = [vertexDesc attributes][2]; // offset color + [spc setFormat:MTLVertexFormatUChar4Normalized]; + [spc setOffset:offsetof(Vertex, spc)]; + [spc setBufferIndex:30]; + + auto u = [vertexDesc attributes][3]; // tex coord + [u setFormat:MTLVertexFormatFloat2]; + [u setOffset:offsetof(Vertex, u)]; + [u setBufferIndex:30]; + + auto col1 = [vertexDesc attributes][4]; + [col1 setFormat:MTLVertexFormatUChar4Normalized]; + [col1 setOffset:offsetof(Vertex, col1)]; + [col1 setBufferIndex:30]; + + auto spc1 = [vertexDesc attributes][5]; + [spc1 setFormat:MTLVertexFormatUChar4Normalized]; + [spc1 setOffset:offsetof(Vertex, spc1)]; + [spc1 setBufferIndex:30]; + + auto u1 = [vertexDesc attributes][6]; // tex coord + [u1 setFormat:MTLVertexFormatFloat2]; + [u1 setOffset:offsetof(Vertex, u1)]; + [u1 setBufferIndex:30]; + + if (naomi2) { + auto nx = [vertexDesc attributes][7]; // naomi2 normal + [nx setFormat:MTLVertexFormatFloat3]; + [nx setOffset:offsetof(Vertex, nx)]; + [nx setBufferIndex:30]; + } + } + + auto layout = [vertexDesc layouts][30]; + [layout setStride:sizeof(Vertex)]; + [layout setStepRate:1]; + [layout setStepFunction:MTLVertexStepFunctionPerVertex]; + + return vertexDesc; + } + + static inline MTLBlendFactor GetBlendFactor(u32 instr, bool src) { + switch (instr) { + case 0: // zero + return MTLBlendFactorZero; + case 1: // one + return MTLBlendFactorOne; + case 2: // other color + return src ? MTLBlendFactorDestinationColor : MTLBlendFactorSourceColor; + case 3: // inverse other color + return src ? MTLBlendFactorOneMinusDestinationColor : MTLBlendFactorOneMinusSourceColor; + case 4: // src alpha + return MTLBlendFactorSourceAlpha; + case 5: // inverse src alpha + return MTLBlendFactorOneMinusSourceAlpha; + case 6: // dst alpha + return MTLBlendFactorDestinationAlpha; + case 7: // inverse dst alpha + return MTLBlendFactorOneMinusDestinationAlpha; + default: + die("Unsupported blend instruction"); + return MTLBlendFactorZero; + } + } + + MetalRenderer *renderer; + id blitPassPipeline = nil; + std::map> pipelines; + std::map> modVolPipelines; + std::map> depthPassPipelines; + + std::map> modVolStencilStates; + std::map> depthStencilStates; + std::map> depthPassDepthStencilStates; +}; + +static const MTLCompareFunction depthOps[] = +{ + MTLCompareFunctionNever, // 0 Never + MTLCompareFunctionLess, // 1 Less + MTLCompareFunctionEqual, // 2 Equal + MTLCompareFunctionLessEqual, // 3 Less Or Equal + MTLCompareFunctionGreater, // 4 Greater + MTLCompareFunctionNotEqual, // 5 Not Equal + MTLCompareFunctionGreaterEqual, // 6 Greater Or Equal + MTLCompareFunctionAlways, // 7 Always +}; \ No newline at end of file diff --git a/core/rend/metal/metal_pipeline.mm b/core/rend/metal/metal_pipeline.mm new file mode 100644 index 0000000000..18e87dc501 --- /dev/null +++ b/core/rend/metal/metal_pipeline.mm @@ -0,0 +1,325 @@ +/* + Copyright 2024 flyinghead + + This file is part of Flycast. + + Flycast is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + Flycast is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Flycast. If not, see . +*/ +#include "metal_pipeline.h" + +#include "metal_context.h" +#include "metal_shaders.h" +#include "metal_renderer.h" + +MetalPipelineManager::MetalPipelineManager(MetalRenderer *renderer) { + this->renderer = renderer; +} + +void MetalPipelineManager::CreateBlitPassPipeline() { + MTLRenderPipelineDescriptor *descriptor = [[MTLRenderPipelineDescriptor alloc] init]; + [descriptor setLabel:@"Blit Pass"]; + + auto attachment = [descriptor colorAttachments][0]; + [attachment setPixelFormat:MTLPixelFormatBGRA8Unorm]; + + [descriptor setVertexFunction:renderer->GetShaders()->GetBlitVertexShader()]; + [descriptor setFragmentFunction:renderer->GetShaders()->GetBlitFragmentShader()]; + + NSError *error = nil; + auto state = [MetalContext::Instance()->GetDevice() newRenderPipelineStateWithDescriptor:descriptor error:&error]; + + if (state == nil) { + ERROR_LOG(RENDERER, "Failed to create Blit Pipeline State: %s", [error localizedDescription]);; + } + + [descriptor release]; + + blitPassPipeline = state; +} + +void MetalPipelineManager::CreateModVolPipeline(ModVolMode mode, int cullMode, bool naomi2) { + MTLVertexDescriptor *vertexDesc = nil; + MTLRenderPipelineDescriptor *descriptor = [[MTLRenderPipelineDescriptor alloc] init]; + + [descriptor setLabel:@"Mod Vol Pass"]; + + if (mode == ModVolMode::Final) { + [descriptor setVertexDescriptor:GetMainVertexInputDescriptor(false, naomi2)]; + } + else { + vertexDesc = [[MTLVertexDescriptor alloc] init]; + + auto layout = [vertexDesc layouts][30]; + [layout setStride:sizeof(float) * 3]; + + auto attribute = [vertexDesc attributes][0]; + [attribute setOffset:0]; + [attribute setBufferIndex:30]; + [attribute setFormat:MTLVertexFormatFloat3]; + + [descriptor setVertexDescriptor:vertexDesc]; + } + + auto attachment = [descriptor colorAttachments][0]; + [attachment setBlendingEnabled:mode == ModVolMode::Final]; + [attachment setSourceRGBBlendFactor:MTLBlendFactorSourceAlpha]; + [attachment setDestinationRGBBlendFactor:MTLBlendFactorOneMinusSourceAlpha]; + [attachment setRgbBlendOperation:MTLBlendOperationAdd]; + [attachment setSourceAlphaBlendFactor:MTLBlendFactorSourceAlpha]; + [attachment setDestinationAlphaBlendFactor:MTLBlendFactorOneMinusSourceAlpha]; + [attachment setAlphaBlendOperation:MTLBlendOperationAdd]; + [attachment setWriteMask:mode != ModVolMode::Final ? MTLColorWriteMaskNone : MTLColorWriteMaskAll]; + [attachment setPixelFormat:MTLPixelFormatBGRA8Unorm]; + + [descriptor setDepthAttachmentPixelFormat:MTLPixelFormatDepth32Float_Stencil8]; + [descriptor setStencilAttachmentPixelFormat:MTLPixelFormatDepth32Float_Stencil8]; + + ModVolShaderParams shaderParams { naomi2, !settings.platform.isNaomi2() && config::NativeDepthInterpolation }; + [descriptor setVertexFunction:renderer->GetShaders()->GetModVolVertexShader(shaderParams)]; + [descriptor setFragmentFunction:renderer->GetShaders()->GetModVolFragmentShader(!settings.platform.isNaomi2() && config::NativeDepthInterpolation)]; + + NSError *error = nil; + auto state = [MetalContext::Instance()->GetDevice() newRenderPipelineStateWithDescriptor:descriptor error:&error]; + + if (state == nullptr) { + ERROR_LOG(RENDERER, "Failed to create Depth Render Pipeline State: %s", [[error localizedDescription] UTF8String]); + } + + [descriptor release]; + if (vertexDesc) { + [vertexDesc release]; + } + + modVolPipelines[hash(mode, cullMode, naomi2)] = state; +} + +void MetalPipelineManager::CreateDepthPassPipeline(int cullMode, bool naomi2) +{ + MTLRenderPipelineDescriptor *descriptor = [[MTLRenderPipelineDescriptor alloc] init]; + [descriptor setLabel:@"Depth Pass"]; + [descriptor setVertexDescriptor:GetMainVertexInputDescriptor(false, false)]; + + auto attachment = [descriptor colorAttachments][0]; + [attachment setBlendingEnabled:false]; + [attachment setSourceRGBBlendFactor:MTLBlendFactorZero]; + [attachment setDestinationRGBBlendFactor:MTLBlendFactorZero]; + [attachment setRgbBlendOperation:MTLBlendOperationAdd]; + [attachment setSourceAlphaBlendFactor:MTLBlendFactorZero]; + [attachment setDestinationAlphaBlendFactor:MTLBlendFactorZero]; + [attachment setAlphaBlendOperation:MTLBlendOperationAdd]; + [attachment setWriteMask:MTLColorWriteMaskNone]; + + // TODO: Need functions here + // descriptor->setVertexFunction(); + // descriptor->setFragmentFunction(); + + NSError *error = nil; + auto state = [MetalContext::Instance()->GetDevice() newRenderPipelineStateWithDescriptor:descriptor error:&error]; + + if (state == nil) { + ERROR_LOG(RENDERER, "Failed to create Depth Render Pipeline State: %s", [[error localizedDescription] UTF8String]); + } + + [descriptor release]; + + depthPassPipelines[hash(cullMode, naomi2)] = state; +} + +void MetalPipelineManager::CreatePipeline(u32 listType, bool sortTriangles, const PolyParam &pp, int gpuPalette, bool dithering) { + MTLRenderPipelineDescriptor *descriptor = [[MTLRenderPipelineDescriptor alloc] init]; + [descriptor setLabel:@"Main Draw"]; + [descriptor setVertexDescriptor:GetMainVertexInputDescriptor(true, pp.isNaomi2())]; + + u32 src = pp.tsp.SrcInstr; + u32 dst = pp.tsp.DstInstr; + + auto attachment = [descriptor colorAttachments][0]; + [attachment setBlendingEnabled:true]; + [attachment setSourceRGBBlendFactor:GetBlendFactor(src, true)]; + [attachment setDestinationRGBBlendFactor:GetBlendFactor(dst, false)]; + [attachment setRgbBlendOperation:MTLBlendOperationAdd]; + [attachment setSourceAlphaBlendFactor:GetBlendFactor(src, true)]; + [attachment setDestinationAlphaBlendFactor:GetBlendFactor(dst, false)]; + [attachment setAlphaBlendOperation:MTLBlendOperationAdd]; + [attachment setWriteMask:MTLColorWriteMaskAll]; + [attachment setPixelFormat:MTLPixelFormatBGRA8Unorm]; + + [descriptor setDepthAttachmentPixelFormat:MTLPixelFormatDepth32Float_Stencil8]; + [descriptor setStencilAttachmentPixelFormat:MTLPixelFormatDepth32Float_Stencil8]; + + bool divPosZ = !settings.platform.isNaomi2() && config::NativeDepthInterpolation; + + VertexShaderParams vertParams = {}; + vertParams.gouraud = pp.pcw.Gouraud == 1; + vertParams.naomi2 = pp.isNaomi2(); + vertParams.divPosZ = divPosZ; + + FragmentShaderParams fragParams = {}; + fragParams.alphaTest = listType == ListType_Punch_Through; + fragParams.bumpmap = pp.tcw.PixelFmt == PixelBumpMap; + fragParams.clamping = pp.tsp.ColorClamp; + fragParams.insideClipTest = (pp.tileclip >> 28) == 3; + fragParams.fog = config::Fog ? pp.tsp.FogCtrl : 2; + fragParams.gouraud = pp.pcw.Gouraud; + fragParams.ignoreTexAlpha = pp.tsp.IgnoreTexA || pp.tcw.PixelFmt == Pixel565; + fragParams.offset = pp.pcw.Offset; + fragParams.shaderInstr = pp.tsp.ShadInstr; + fragParams.texture = pp.pcw.Texture; + fragParams.trilinear = pp.pcw.Texture && pp.tsp.FilterMode > 1 && listType != ListType_Punch_Through && pp.tcw.MipMapped == 1; + fragParams.useAlpha = pp.tsp.UseAlpha; + fragParams.palette = gpuPalette; + fragParams.divPosZ = divPosZ; + fragParams.dithering = dithering; + + [descriptor setVertexFunction:renderer->GetShaders()->GetVertexShader(vertParams)]; + [descriptor setFragmentFunction:renderer->GetShaders()->GetFragmentShader(fragParams)]; + + NSError *error = nil; + auto state = [MetalContext::Instance()->GetDevice() newRenderPipelineStateWithDescriptor:descriptor error:&error]; + + if (state == nullptr) { + ERROR_LOG(RENDERER, "Failed to create Render Pipeline State: %s", [[error localizedDescription] UTF8String]); + } + + [descriptor release]; + + pipelines[hash(listType, sortTriangles, &pp, gpuPalette, dithering)] = state; +} + +void MetalPipelineManager::CreateModVolDepthStencilState(ModVolMode mode, int cullMode, bool naomi2) { + MTLDepthStencilDescriptor *descriptor = [[MTLDepthStencilDescriptor alloc] init]; + [descriptor setDepthWriteEnabled:false]; + [descriptor setDepthCompareFunction:mode == ModVolMode::Xor || mode == ModVolMode::Or ? MTLCompareFunctionGreater : MTLCompareFunctionAlways]; + + MTLStencilDescriptor *stencilDescriptor = [[MTLStencilDescriptor alloc] init]; + switch (mode) + { + case ModVolMode::Xor: + [stencilDescriptor setStencilFailureOperation:MTLStencilOperationKeep]; + [stencilDescriptor setDepthStencilPassOperation:MTLStencilOperationInvert]; + [stencilDescriptor setDepthFailureOperation:MTLStencilOperationKeep]; + [stencilDescriptor setStencilCompareFunction:MTLCompareFunctionAlways]; + [stencilDescriptor setReadMask:0]; + [stencilDescriptor setWriteMask:2]; + break; + case ModVolMode::Or: + [stencilDescriptor setStencilFailureOperation:MTLStencilOperationKeep]; + [stencilDescriptor setDepthStencilPassOperation:MTLStencilOperationReplace]; + [stencilDescriptor setDepthFailureOperation:MTLStencilOperationKeep]; + [stencilDescriptor setStencilCompareFunction:MTLCompareFunctionAlways]; + [stencilDescriptor setReadMask:2]; + [stencilDescriptor setWriteMask:2]; + break; + case ModVolMode::Inclusion: + [stencilDescriptor setStencilFailureOperation:MTLStencilOperationZero]; + [stencilDescriptor setDepthStencilPassOperation:MTLStencilOperationReplace]; + [stencilDescriptor setDepthFailureOperation:MTLStencilOperationZero]; + [stencilDescriptor setStencilCompareFunction:MTLCompareFunctionLessEqual]; + [stencilDescriptor setReadMask:3]; + [stencilDescriptor setWriteMask:3]; + break; + case ModVolMode::Exclusion: + [stencilDescriptor setStencilFailureOperation:MTLStencilOperationZero]; + [stencilDescriptor setDepthStencilPassOperation:MTLStencilOperationKeep]; + [stencilDescriptor setDepthFailureOperation:MTLStencilOperationZero]; + [stencilDescriptor setStencilCompareFunction:MTLCompareFunctionEqual]; + [stencilDescriptor setReadMask:3]; + [stencilDescriptor setWriteMask:3]; + break; + case ModVolMode::Final: + [stencilDescriptor setStencilFailureOperation:MTLStencilOperationZero]; + [stencilDescriptor setDepthStencilPassOperation:MTLStencilOperationZero]; + [stencilDescriptor setDepthFailureOperation:MTLStencilOperationZero]; + [stencilDescriptor setStencilCompareFunction:MTLCompareFunctionEqual]; + [stencilDescriptor setReadMask:0x81]; + [stencilDescriptor setWriteMask:3]; + break; + } + + [descriptor setFrontFaceStencil:stencilDescriptor]; + [descriptor setBackFaceStencil:stencilDescriptor]; + + auto state = [MetalContext::Instance()->GetDevice() newDepthStencilStateWithDescriptor:descriptor]; + + [descriptor release]; + + modVolStencilStates[hash(mode, cullMode, naomi2)] = state; +} + +void MetalPipelineManager::CreateDepthPassDepthStencilState(int cullMode, bool naomi2) { + MTLDepthStencilDescriptor *descriptor = [[MTLDepthStencilDescriptor alloc] init]; + [descriptor setLabel:@"Sorted Depth Pass"]; + [descriptor setDepthWriteEnabled:true]; + [descriptor setDepthCompareFunction:MTLCompareFunctionGreaterEqual]; + + auto state = [MetalContext::Instance()->GetDevice() newDepthStencilStateWithDescriptor:descriptor]; + + [descriptor release]; + + depthPassDepthStencilStates[hash(cullMode, naomi2)] = state; +} + +void MetalPipelineManager::CreateDepthStencilState(u32 listType, bool sortTriangles, bool shadowed, const PolyParam &pp) { + MTLDepthStencilDescriptor *descriptor = [[MTLDepthStencilDescriptor alloc] init]; + if (shadowed) + [descriptor setLabel:@"Main Shadowed Depth-Stencil State"]; + else + [descriptor setLabel:@"Main Depth-Stencil State"]; + + MTLCompareFunction compareFunction; + if (listType == ListType_Punch_Through || sortTriangles) { + compareFunction = MTLCompareFunctionGreaterEqual; + } else { + compareFunction = depthOps[pp.isp.DepthMode]; + } + + bool depthWriteEnabled; + if (sortTriangles) { + depthWriteEnabled = false; + } else { + // Z Write Disable seems to be ignored for punch-through. + // Fixes Worms World Party, Bust-a-Move 4 and Re-Volt + if (listType == ListType_Punch_Through) { + depthWriteEnabled = true; + } else { + depthWriteEnabled = !pp.isp.ZWriteDis; + } + } + + MTLStencilDescriptor *stencilDescriptor = [[MTLStencilDescriptor alloc] init]; + [stencilDescriptor setStencilFailureOperation:MTLStencilOperationKeep]; + [stencilDescriptor setDepthStencilPassOperation:MTLStencilOperationKeep]; + + if (shadowed) { + [stencilDescriptor setDepthStencilPassOperation:MTLStencilOperationReplace]; + [stencilDescriptor setStencilCompareFunction:MTLCompareFunctionAlways]; + [stencilDescriptor setReadMask:0]; + [stencilDescriptor setWriteMask:0x80]; + } + + [descriptor setDepthCompareFunction:compareFunction]; + [descriptor setDepthWriteEnabled:depthWriteEnabled]; + + if (shadowed) { + [descriptor setBackFaceStencil:stencilDescriptor]; + [descriptor setFrontFaceStencil:stencilDescriptor]; + } + + auto state = [MetalContext::Instance()->GetDevice() newDepthStencilStateWithDescriptor:descriptor]; + + [descriptor release]; + [stencilDescriptor release]; + + depthStencilStates[hash(listType, sortTriangles, shadowed, &pp)] = state; +} diff --git a/core/rend/metal/metal_renderer.h b/core/rend/metal/metal_renderer.h new file mode 100644 index 0000000000..2e4b82dd05 --- /dev/null +++ b/core/rend/metal/metal_renderer.h @@ -0,0 +1,144 @@ +/* + Copyright 2024 flyinghead + + This file is part of Flycast. + + Flycast is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + Flycast is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Flycast. If not, see . +*/ +#pragma once +#include "metal_pipeline.h" +#include "metal_shaders.h" +#include "metal_texture.h" +#include "metal_buffer.h" +#include "hw/pvr/Renderer_if.h" + +#include "rend/tileclip.h" +#include "rend/transform_matrix.h" + +class MetalRenderer final : public Renderer +{ +public: + bool Init() override; + void Term() override; + void Process(TA_context* ctx) override; + bool Render() override; + void RenderFramebuffer(const FramebufferInfo& info) override; + MetalShaders* GetShaders() { return &shaders; } + BaseTextureCacheData *GetTexture(TSP tsp, TCW tcw) override;; + +private: + bool Draw(const MetalTexture *fogTexture, const MetalTexture *paletteTexture); + void DrawPoly(id encoder, u32 listType, bool sortTriangles, const PolyParam& poly, u32 first, u32 count); + void DrawSorted(id encoder, const std::vector& polys, u32 first, u32 last, bool multipass); + void DrawList(id encoder, u32 listType, bool sortTriangles, const std::vector& polys, u32 first, u32 last); + void DrawModVols(id encoder, int first, int count); + void UploadMainBuffer(const VertexShaderUniforms& vertexUniforms, const FragmentShaderUniforms& fragmentUniforms); + void EndRenderPass(); + +protected: + TileClipping SetTileClip(id encoder, u32 val, MTLScissorRect& clipRect); + void SetBaseScissor(MTLViewport viewport); + + void SetScissor(id encoder, const MTLScissorRect& scissor) + { + if (scissor.x != currentScissor.x || + scissor.y != currentScissor.y || + scissor.width != currentScissor.width || + scissor.height != currentScissor.height) + { + [encoder setScissorRect:scissor]; + currentScissor = scissor; + } + } + + MetalBufferData* GetMainBuffer(u32 size) + { + if (mainBuffer != nullptr) + { + if (mainBuffer->bufferSize < size) { + u32 newSize = (u32)mainBuffer->bufferSize; + while (newSize < size) + newSize *= 2; + + INFO_LOG(RENDERER, "Increasing main buffer size %zd -> %d", mainBuffer->bufferSize, newSize); + [mainBuffer->buffer setPurgeableState: MTLPurgeableStateEmpty]; + [mainBuffer->buffer release]; + + mainBuffer = new MetalBufferData(newSize); + } + } + else + { + mainBuffer = new MetalBufferData(std::max(512 * 1024u, size)); + } + + return mainBuffer; + } + + template + T MakeFragmentUniforms() + { + T fragUniforms; + + //VERT and RAM fog color constants + FOG_COL_VERT.getRGBColor(fragUniforms.sp_FOG_COL_VERT); + FOG_COL_RAM.getRGBColor(fragUniforms.sp_FOG_COL_RAM); + + //Fog density constant + fragUniforms.sp_FOG_DENSITY = FOG_DENSITY.get() * config::ExtraDepthScale; + + pvrrc.fog_clamp_min.getRGBAColor(fragUniforms.colorClampMin); + pvrrc.fog_clamp_max.getRGBAColor(fragUniforms.colorClampMax); + + fragUniforms.cp_AlphaTestValue = (PT_ALPHA_REF & 0xFF) / 255.0f; + + return fragUniforms; + } + + void CheckFogTexture(); + void CheckPaletteTexture(); + + struct { + u64 indexOffset = 0; + u64 modVolOffset = 0; + u64 vertexUniformOffset = 0; + u64 fragmentUniformOffset = 0; + u64 naomi2OpaqueOffset = 0; + u64 naomi2PunchThroughOffset = 0; + u64 naomi2TranslucentOffset = 0; + u64 naomi2ModVolOffset = 0; + u64 naomi2TrModVolOffset = 0; + u64 lightsOffset = 0; + } offsets; + + bool renderPassStarted = false; + + MTLScissorRect baseScissor {}; + MTLScissorRect currentScissor {}; + TransformMatrix matrices; + + id frameBuffer = nil; + id depthBuffer = nil; + + id curMainBuffer = nil; + MetalBufferData* mainBuffer; + MetalPipelineManager pipelineManager = MetalPipelineManager(this); + MetalShaders shaders; + MetalTextureCache textureCache; + std::unique_ptr fogTexture; + std::unique_ptr paletteTexture; + MetalSamplers samplers; + bool frameRendered = false; + bool dithering = false; +}; diff --git a/core/rend/metal/metal_renderer.mm b/core/rend/metal/metal_renderer.mm new file mode 100644 index 0000000000..8b2a687afc --- /dev/null +++ b/core/rend/metal/metal_renderer.mm @@ -0,0 +1,611 @@ +/* + Copyright 2024 flyinghead + + This file is part of Flycast. + + Flycast is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + Flycast is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Flycast. If not, see . +*/ + +#include +#include + +#include "metal_renderer.h" +#include "hw/aica/dsp.h" +#include "hw/pvr/ta.h" +#include "hw/pvr/pvr_mem.h" + +bool MetalRenderer::Init() +{ + NOTICE_LOG(RENDERER, "Metal renderer initializing"); + + pipelineManager = MetalPipelineManager(this); + shaders = MetalShaders(); + samplers = MetalSamplers(); + + frameRendered = false; + + return true; +} + +void MetalRenderer::Term() { + pipelineManager.term(); + shaders.term(); + samplers.term(); + fogTexture = nullptr; + paletteTexture = nullptr; +} + +void MetalRenderer::Process(TA_context *ctx) { + if (!ctx->rend.isRTT) { + frameRendered = false; + if (!config::EmulateFramebuffer) + clearLastFrame = false; + } + + ta_parse(ctx, true); + + // TODO can't update fog or palette twice in multi render + CheckFogTexture(); + CheckPaletteTexture(); +} + +bool MetalRenderer::Render() { + if (pvrrc.isRTT) { + + } + else { + + } + + // TODO: Don't hardcode these values + matrices.CalcMatrices(&pvrrc, 1920, 1080); + Draw(fogTexture.get(), paletteTexture.get()); + // if (config::EmulateFramebuffer || pvrrc.isRTT) + // // delay ending the render pass in case of multi render + // EndRenderPass(); + + return true; +} + +void MetalRenderer::EndRenderPass() { + if (!renderPassStarted) + return; + + frameRendered = true; +} + +void MetalRenderer::RenderFramebuffer(const FramebufferInfo &info) { + +} + +BaseTextureCacheData *MetalRenderer::GetTexture(TSP tsp, TCW tcw) { + MetalTexture* tf = textureCache.getTextureCacheData(tsp, tcw); + + if (tf->NeedsUpdate()) { + if (!tf->Update()) { + tf= nullptr; + } + } + else if (tf->IsCustomTextureAvailable()) { + // TODO + } + + return tf; +} + +void MetalRenderer::CheckFogTexture() { + if (!fogTexture) + { + fogTexture = std::make_unique(); + fogTexture->tex_type = TextureType::_8; + updateFogTable = true; + } + if (!updateFogTable || !config::Fog) + return; + updateFogTable = false; + u8 texData[256]; + MakeFogTexture(texData); + + fogTexture->UploadToGPU(128, 2, texData, false); +} + +void MetalRenderer::CheckPaletteTexture() { + if (!paletteTexture) + { + paletteTexture = std::make_unique(); + paletteTexture->tex_type = TextureType::_8; + } + else if (!updatePalette) + return; + updatePalette = false; + + paletteTexture->UploadToGPU(1024, 1, (u8 *)palette32_ram, false); +} + +TileClipping MetalRenderer::SetTileClip(id encoder, u32 val, MTLScissorRect& clipRect) { + int rect[4] = {}; + TileClipping clipMode = GetTileClip(val, matrices.GetViewportMatrix(), rect); + if (clipMode != TileClipping::Off) + { + clipRect.x = rect[0]; + clipRect.y = rect[1]; + clipRect.width = rect[2]; + clipRect.height = rect[3]; + } + if (clipMode == TileClipping::Outside) + SetScissor(encoder, clipRect); + else + SetScissor(encoder, baseScissor); + + return clipMode; +} + + +void MetalRenderer::SetBaseScissor(MTLViewport viewport) { + bool wide_screen_on = config::Widescreen + && !matrices.IsClipped() && !config::Rotate90 && !config::EmulateFramebuffer; + if (!wide_screen_on) + { + float width; + float height; + float min_x; + float min_y; + glm::vec4 clip_min(pvrrc.fb_X_CLIP.min, pvrrc.fb_Y_CLIP.min, 0, 1); + glm::vec4 clip_dim(pvrrc.fb_X_CLIP.max - pvrrc.fb_X_CLIP.min + 1, + pvrrc.fb_Y_CLIP.max - pvrrc.fb_Y_CLIP.min + 1, 0, 0); + clip_min = matrices.GetScissorMatrix() * clip_min; + clip_dim = matrices.GetScissorMatrix() * clip_dim; + + min_x = clip_min[0]; + min_y = clip_min[1]; + width = clip_dim[0]; + height = clip_dim[1]; + if (width < 0) + { + min_x += width; + width = -width; + } + if (height < 0) + { + min_y += height; + height = -height; + } + + baseScissor = MTLScissorRect(); + baseScissor.x = std::max(lroundf(min_x), 0L); + baseScissor.y = std::max(lroundf(min_y), 0L); + baseScissor.width = std::max(lroundf(width), 0L); + baseScissor.height = std::max(lroundf(height), 0L); + } + else + { + baseScissor = MTLScissorRect(); + baseScissor.x = 0; + baseScissor.y = 0; + baseScissor.width = viewport.width; + baseScissor.height = viewport.height; + } +} + +void MetalRenderer::DrawPoly(id encoder, u32 listType, bool sortTriangles, const PolyParam &poly, u32 first, u32 count) +{ + MTLScissorRect scissorRect {}; + TileClipping tileClip = SetTileClip(encoder, poly.tileclip, scissorRect); + + float trilinearAlpha = 1.0f; + if (poly.tsp.FilterMode > 1 && poly.pcw.Texture && listType != ListType_Punch_Through && poly.tcw.MipMapped == 1) + { + trilinearAlpha = 0.25f * (poly.tsp.MipMapD & 0x3); + if (poly.tsp.FilterMode == 2) + // Trilinear pass A + trilinearAlpha = 1.0f - trilinearAlpha; + } + int gpuPalette = poly.texture == nullptr || !poly.texture->gpuPalette ? 0 + : poly.tsp.FilterMode + 1; + float palette_index = 0.0f; + if (gpuPalette != 0) + { + if (config::TextureFiltering == 1) + gpuPalette = 1; + else if (config::TextureFiltering == 2) + gpuPalette = 2; + if (poly.tcw.PixelFmt == PixelPal4) + palette_index = float(poly.tcw.PalSelect << 4) / 1023.0f; + else + palette_index = float(poly.tcw.PalSelect >> 4 << 8) / 1023.0f; + } + + if (tileClip == TileClipping::Inside || trilinearAlpha != 1.0f || gpuPalette != 0) + { + const std::array pushConstants = { + (float)scissorRect.x, + (float)scissorRect.y, + (float)scissorRect.x + (float)scissorRect.width, + (float)scissorRect.y + (float)scissorRect.height, + trilinearAlpha, + palette_index + }; + + [encoder setFragmentBytes:pushConstants.data() length:sizeof(pushConstants) atIndex:1]; + } + + bool shadowed = listType == ListType_Opaque || listType == ListType_Punch_Through; + + [encoder setRenderPipelineState:pipelineManager.GetPipeline(listType, sortTriangles, poly, gpuPalette, dithering)]; + [encoder setDepthStencilState:pipelineManager.GetDepthStencilStates(listType, sortTriangles, shadowed, poly)]; + + if (shadowed) { + if (poly.pcw.Shadow != 0) { + [encoder setStencilReferenceValue:0x80]; + } else { + [encoder setStencilReferenceValue:0]; + } + } + + if (poly.texture != nullptr) { + auto texture = ((MetalTexture *)poly.texture)->texture; + [encoder setFragmentTexture:texture atIndex:0]; + + // Texture sampler + [encoder setFragmentSamplerState:samplers.GetSampler(poly, listType == ListType_Punch_Through), 0]; + } + + if (poly.pcw.Texture || poly.isNaomi2()) + { + u32 index = 0; + if (poly.isNaomi2()) + { + + } + + // TODO: Bind Texture & Naomi2 Lights Buffers + } + + MTLPrimitiveType primitive = sortTriangles && !config::PerStripSorting ? MTLPrimitiveTypeTriangle : MTLPrimitiveTypeTriangleStrip; + + [encoder drawIndexedPrimitives: primitive + indexCount: count + indexType: MTLIndexTypeUInt32 + indexBuffer: curMainBuffer + indexBufferOffset: offsets.indexOffset + first * sizeof(u32) + instanceCount: 1]; +} + +void MetalRenderer::DrawSorted(id encoder, const std::vector &polys, u32 first, u32 last, bool multipass) +{ + if (first == last) + return; + + [encoder pushDebugGroup:@"DrawSorted"]; + + for (u32 idx = first; idx < last; idx++) + DrawPoly(encoder, ListType_Translucent, true, pvrrc.global_param_tr[polys[idx].polyIndex], polys[idx].first, polys[idx].count); + if (multipass && config::TranslucentPolygonDepthMask) + { + // Write to the depth buffer now. The next render pass might need it. (Cosmic Smash) + for (u32 idx = first; idx < last; idx++) + { + const SortedTriangle& param = polys[idx]; + const PolyParam& polyParam = pvrrc.global_param_tr[param.polyIndex]; + if (polyParam.isp.ZWriteDis) + continue; + [encoder setRenderPipelineState:pipelineManager.GetDepthPassPipeline(polyParam.isp.CullMode, polyParam.isNaomi2())]; + [encoder setDepthStencilState:pipelineManager.GetDepthPassDepthStencilStates(polyParam.isp.CullMode, polyParam.isNaomi2())]; + MTLScissorRect scissorRect {}; + SetTileClip(encoder, polyParam.tileclip, scissorRect); + [encoder drawIndexedPrimitives:MTLPrimitiveTypeTriangle + indexCount:param.count + indexType:MTLIndexTypeUInt32 indexBuffer:curMainBuffer + indexBufferOffset:offsets.indexOffset + param.first * sizeof(u32) + instanceCount:1]; + } + } + + [encoder popDebugGroup]; +} + +void MetalRenderer::DrawList(id encoder, u32 listType, bool sortTriangles, const std::vector &polys, u32 first, u32 last) +{ + if (first == last) + return; + + [encoder pushDebugGroup:@"DrawList"]; + + const PolyParam *pp_end = polys.data() + last; + for (const PolyParam *pp = &polys[first]; pp != pp_end; pp++) + if (pp->count > 2) + DrawPoly(encoder, listType, sortTriangles, *pp, pp->first, pp->count); + + [encoder popDebugGroup]; +} + +void MetalRenderer::DrawModVols(id encoder, int first, int count) +{ + if (count == 0 || pvrrc.modtrig.empty() || !config::ModifierVolumes) + return; + + [encoder pushDebugGroup:@"DrawModVols"]; + [encoder setVertexBufferOffset:offsets.modVolOffset atIndex:30]; + + ModifierVolumeParam* params = &pvrrc.global_param_mvo[first]; + + int mod_base = -1; + id state; + id depth_state; + + const std::array pushConstants = { 1 - FPU_SHAD_SCALE.scale_factor / 256.f }; + [encoder setFragmentBytes:pushConstants.data() length:sizeof(pushConstants) atIndex:1]; + + for (int cmv = 0; cmv < count; cmv++) { + ModifierVolumeParam& param = params[cmv]; + MTLCullMode cull_mode = param.isp.CullMode == 3 ? MTLCullModeBack : param.isp.CullMode == 2 ? MTLCullModeFront : MTLCullModeNone; + [encoder setCullMode:cull_mode]; + [encoder setFrontFacingWinding:MTLWindingCounterClockwise]; + + if (param.count == 0) + continue; + + u32 mv_mode = param.isp.DepthMode; + + if (mod_base == -1) + mod_base = param.first; + + if (!param.isp.VolumeLast && mv_mode > 0) { + state = pipelineManager.GetModifierVolumePipeline(ModVolMode::Or, param.isp.CullMode, param.isNaomi2()); // OR'ing (open volume or quad) + depth_state = pipelineManager.GetModVolDepthStencilStates(ModVolMode::Or, param.isp.CullMode, param.isNaomi2()); + } else { + state = pipelineManager.GetModifierVolumePipeline(ModVolMode::Xor, param.isp.CullMode, param.isNaomi2()); // XOR'ing (closed volume) + depth_state = pipelineManager.GetModVolDepthStencilStates(ModVolMode::Xor, param.isp.CullMode, param.isNaomi2()); + } + + [encoder setRenderPipelineState:state]; + [encoder setDepthStencilState:depth_state]; + [encoder setStencilReferenceValue:2]; + MTLScissorRect scissorRect {}; + SetTileClip(encoder, param.tileclip, scissorRect); + // TODO inside clipping + + [encoder drawPrimitives:MTLPrimitiveTypeTriangle + vertexStart:param.first * 3 + vertexCount:param.count * 3 + instanceCount:1]; + + if (mv_mode == 1 || mv_mode == 2) + { + // Sum the area + state = pipelineManager.GetModifierVolumePipeline(mv_mode == 1 ? ModVolMode::Inclusion : ModVolMode::Exclusion, param.isp.CullMode, param.isNaomi2()); + depth_state = pipelineManager.GetModVolDepthStencilStates(mv_mode == 1 ? ModVolMode::Inclusion : ModVolMode::Exclusion, param.isp.CullMode, param.isNaomi2()); + [encoder setRenderPipelineState:state]; + [encoder setDepthStencilState:depth_state]; + [encoder setStencilReferenceValue:1]; + [encoder drawPrimitives:MTLPrimitiveTypeTriangle + vertexStart: mod_base * 3 + vertexCount: (param.first + param.count - mod_base) * 3 + instanceCount: 1]; + mod_base = -1; + } + } + [encoder setVertexBufferOffset:0 atIndex:30]; + + state = pipelineManager.GetModifierVolumePipeline(ModVolMode::Final, 0, false); + depth_state = pipelineManager.GetModVolDepthStencilStates(ModVolMode::Final, 0, false); + [encoder setRenderPipelineState:state]; + [encoder setDepthStencilState:depth_state]; + [encoder setStencilReferenceValue:0x81]; + [encoder drawIndexedPrimitives: MTLPrimitiveTypeTriangleStrip + indexCount: 4 + indexType: MTLIndexTypeUInt32 + indexBuffer: curMainBuffer + indexBufferOffset: offsets.indexOffset + instanceCount: 1]; + + [encoder popDebugGroup]; +} + +void MetalRenderer::UploadMainBuffer(const VertexShaderUniforms &vertexUniforms, const FragmentShaderUniforms &fragmentUniforms) { + BufferPacker packer; + + // Vertex + packer.add(pvrrc.verts.data(), pvrrc.verts.size() * sizeof(decltype(*pvrrc.verts.data()))); + // Modifier Volumes + offsets.modVolOffset = packer.add(pvrrc.modtrig.data(), pvrrc.modtrig.size() * sizeof(decltype(*pvrrc.modtrig.data()))); + // Index + offsets.indexOffset = packer.add(pvrrc.idx.data(), pvrrc.idx.size() * sizeof(decltype(*pvrrc.idx.data()))); + // Uniform buffers + offsets.vertexUniformOffset = packer.addUniform(&vertexUniforms, sizeof(vertexUniforms)); + offsets.fragmentUniformOffset = packer.addUniform(&fragmentUniforms, sizeof(fragmentUniforms)); + + std::vector n2uniforms; + if (settings.platform.isNaomi2()) + { + // packNaomi2Uniforms(packer, offsets, n2uniforms, false); + // offsets.lightsOffset = packNaomi2Lights(packer); + } + + MetalBufferData *buffer = GetMainBuffer(packer.size()); + packer.upload(*buffer); + curMainBuffer = buffer->buffer; +} + +bool MetalRenderer::Draw(const MetalTexture *fogTexture, const MetalTexture *paletteTexture) { + NSAutoreleasePool *pool = [[NSAutoreleasePool alloc] init]; + + FragmentShaderUniforms fragUniforms = MakeFragmentUniforms(); + dithering = config::EmulateFramebuffer && pvrrc.fb_W_CTRL.fb_dither && pvrrc.fb_W_CTRL.fb_packmode <= 3; + if (dithering) { + switch (pvrrc.fb_W_CTRL.fb_packmode) + { + case 0: // 0555 KRGB 16 bit + case 3: // 1555 ARGB 16 bit + fragUniforms.ditherColorMax[0] = fragUniforms.ditherColorMax[1] = fragUniforms.ditherColorMax[2] = 31.f; + fragUniforms.ditherColorMax[3] = 255.f; + break; + case 1: // 565 RGB 16 bit + fragUniforms.ditherColorMax[0] = fragUniforms.ditherColorMax[2] = 31.f; + fragUniforms.ditherColorMax[1] = 63.f; + fragUniforms.ditherColorMax[3] = 255.f; + break; + case 2: // 4444 ARGB 16 bit + fragUniforms.ditherColorMax[0] = fragUniforms.ditherColorMax[1] + = fragUniforms.ditherColorMax[2] = fragUniforms.ditherColorMax[3] = 15.f; + break; + default: + break; + } + } + + currentScissor = MTLScissorRect {}; + + if (frameBuffer != nil) { + [frameBuffer setPurgeableState:MTLPurgeableStateEmpty]; + [frameBuffer release]; + frameBuffer = nil; + } + + if (depthBuffer != nil) { + [depthBuffer setPurgeableState:MTLPurgeableStateEmpty]; + [depthBuffer release]; + depthBuffer = nil; + } + + MTLTextureDescriptor *desc = [[MTLTextureDescriptor alloc] init]; + [desc setPixelFormat:MTLPixelFormatBGRA8Unorm]; + [desc setWidth:pvrrc.framebufferWidth]; + [desc setHeight:pvrrc.framebufferHeight]; + [desc setUsage:MTLTextureUsageShaderRead | MTLTextureUsageRenderTarget]; + + frameBuffer = [MetalContext::Instance()->GetDevice() newTextureWithDescriptor:desc]; + [desc release]; + + MTLTextureDescriptor *depthDesc = [[MTLTextureDescriptor alloc] init]; + [depthDesc setPixelFormat:MTLPixelFormatDepth32Float_Stencil8]; + [depthDesc setWidth:pvrrc.framebufferWidth]; + [depthDesc setHeight:pvrrc.framebufferHeight]; + [depthDesc setUsage:MTLTextureUsageShaderRead | MTLTextureUsageRenderTarget]; + + depthBuffer = [MetalContext::Instance()->GetDevice() newTextureWithDescriptor:depthDesc]; + [depthDesc release]; + + auto drawable = [MetalContext::Instance()->GetLayer() nextDrawable]; + + id buffer = MetalContext::Instance()->commandBuffer; + MTLRenderPassDescriptor *descriptor = [[MTLRenderPassDescriptor alloc] init]; + auto color = [descriptor colorAttachments][0]; + [color setTexture:frameBuffer]; + [color setLoadAction:MTLLoadActionClear]; + [color setStoreAction:MTLStoreActionStore]; + + MTLRenderPassDepthAttachmentDescriptor *depthAttachmentDescriptor = [[MTLRenderPassDepthAttachmentDescriptor alloc] init]; + [depthAttachmentDescriptor setTexture:depthBuffer]; + [depthAttachmentDescriptor setLoadAction:MTLLoadActionClear]; + [depthAttachmentDescriptor setStoreAction:MTLStoreActionDontCare]; + + MTLRenderPassStencilAttachmentDescriptor *stencilAttachmentDescriptor = [[MTLRenderPassStencilAttachmentDescriptor alloc] init]; + [stencilAttachmentDescriptor setTexture:depthBuffer]; + [stencilAttachmentDescriptor setLoadAction:MTLLoadActionClear]; + [stencilAttachmentDescriptor setStoreAction:MTLStoreActionDontCare]; + + [descriptor setDepthAttachment:depthAttachmentDescriptor]; + [descriptor setStencilAttachment:stencilAttachmentDescriptor]; + + [depthAttachmentDescriptor release]; + [stencilAttachmentDescriptor release]; + + id renderEncoder = [buffer renderCommandEncoderWithDescriptor:descriptor]; + + [descriptor release]; + + [renderEncoder setFragmentTexture:fogTexture->texture atIndex:2]; + [renderEncoder setFragmentTexture:paletteTexture->texture atIndex:3]; + + // Fog sampler + TSP fogTsp = {}; + fogTsp.FilterMode = 1; + fogTsp.ClampU = 1; + fogTsp.ClampV = 1; + [renderEncoder setFragmentSamplerState:samplers.GetSampler(fogTsp) atIndex:2]; + + // Palette sampler + TSP palTsp = {}; + palTsp.FilterMode = 0; + palTsp.ClampU = 1; + palTsp.ClampV = 1; + [renderEncoder setFragmentSamplerState:samplers.GetSampler(palTsp) atIndex:3]; + + // Upload vertex and index buffers + VertexShaderUniforms vtxUniforms {}; + vtxUniforms.ndcMat = matrices.GetNormalMatrix(); + + UploadMainBuffer(vtxUniforms, fragUniforms); + + [renderEncoder setVertexBuffer:curMainBuffer offset:0 atIndex:30]; + [renderEncoder setVertexBuffer:curMainBuffer offset:offsets.vertexUniformOffset atIndex:0]; + [renderEncoder setFragmentBuffer:curMainBuffer offset:offsets.fragmentUniformOffset atIndex:0]; + + RenderPass previous_pass {}; + for (int render_pass = 0; render_pass < (int)pvrrc.render_passes.size(); render_pass++) { + const RenderPass& current_pass = pvrrc.render_passes[render_pass]; + + DEBUG_LOG(RENDERER, "Render pass %d OP %d PT %d TR %d MV %d autosort %d", render_pass + 1, + current_pass.op_count - previous_pass.op_count, + current_pass.pt_count - previous_pass.pt_count, + current_pass.tr_count - previous_pass.tr_count, + current_pass.mvo_count - previous_pass.mvo_count, current_pass.autosort); + DrawList(renderEncoder, ListType_Opaque, false, pvrrc.global_param_op, previous_pass.op_count, current_pass.op_count); + DrawList(renderEncoder, ListType_Punch_Through, false, pvrrc.global_param_pt, previous_pass.pt_count, current_pass.pt_count); + DrawModVols(renderEncoder, previous_pass.mvo_count, current_pass.mvo_count - previous_pass.mvo_count); + if (current_pass.autosort) { + if (!config::PerStripSorting) + DrawSorted(renderEncoder, pvrrc.sortedTriangles, previous_pass.sorted_tr_count, current_pass.sorted_tr_count, render_pass + 1 < (int)pvrrc.render_passes.size()); + else + DrawList(renderEncoder, ListType_Translucent, true, pvrrc.global_param_tr, previous_pass.tr_count, current_pass.tr_count); + } else { + // TODO: This breaking? + // DrawList(renderEncoder, ListType_Translucent, false, pvrrc.global_param_tr, previous_pass.tr_count, current_pass.tr_count); + } + previous_pass = current_pass; + } + + [renderEncoder endEncoding]; + + // Blit to framebuffer + descriptor = [[MTLRenderPassDescriptor alloc] init]; + color = [descriptor colorAttachments][0]; + [color setTexture:[drawable texture]]; + [color setLoadAction:MTLLoadActionClear]; + [color setStoreAction:MTLStoreActionStore]; + + renderEncoder = [buffer renderCommandEncoderWithDescriptor:descriptor]; + + [descriptor release]; + + [renderEncoder setRenderPipelineState:pipelineManager.GetBlitPassPipeline()]; + [renderEncoder setFragmentTexture:frameBuffer atIndex:0]; + [renderEncoder drawPrimitives: MTLPrimitiveTypeTriangleStrip vertexStart:0 vertexCount:4]; + [renderEncoder endEncoding]; + + [buffer presentDrawable:drawable]; + [buffer commit]; + [pool release]; + + DEBUG_LOG(RENDERER, "Render command buffer released"); + + MetalContext::Instance()->commandBuffer = [MetalContext::Instance()->GetQueue() commandBuffer]; + return !pvrrc.isRTT; +} + +Renderer* rend_Metal() +{ + return new MetalRenderer(); +} diff --git a/core/rend/metal/metal_shaders.h b/core/rend/metal/metal_shaders.h new file mode 100644 index 0000000000..1f9e470121 --- /dev/null +++ b/core/rend/metal/metal_shaders.h @@ -0,0 +1,153 @@ +/* +Copyright 2024 flyinghead + + This file is part of Flycast. + + Flycast is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + Flycast is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Flycast. If not, see . +*/ +#pragma once +#include + +#include "types.h" +#include +#include + + +struct VertexShaderParams +{ + bool gouraud; + bool naomi2; + bool divPosZ; + + u32 hash() { return (u32)gouraud | ((u32)naomi2 << 1) | ((u32)divPosZ << 2); } +}; + +struct FragmentShaderParams +{ + bool alphaTest; + bool insideClipTest; + bool useAlpha; + bool texture; + bool ignoreTexAlpha; + int shaderInstr; + bool offset; + int fog; + bool gouraud; + bool bumpmap; + bool clamping; + bool trilinear; + int palette; + bool divPosZ; + bool dithering; + + u32 hash() + { + return ((u32)alphaTest) | ((u32)insideClipTest << 1) | ((u32)useAlpha << 2) + | ((u32)texture << 3) | ((u32)ignoreTexAlpha << 4) | (shaderInstr << 5) + | ((u32)offset << 7) | ((u32)fog << 8) | ((u32)gouraud << 10) + | ((u32)bumpmap << 11) | ((u32)clamping << 12) | ((u32)trilinear << 13) + | ((u32)palette << 14) | ((u32)divPosZ << 16) | ((u32)dithering << 17); + } +}; + +struct ModVolShaderParams +{ + bool naomi2; + bool divPosZ; + + u32 hash() { return (u32)naomi2 | ((u32)divPosZ << 1); } +}; + +// std140 alignment required +struct VertexShaderUniforms +{ + glm::mat4 ndcMat; +}; + +// std140 alignment required +struct FragmentShaderUniforms +{ + float colorClampMin[4]; + float colorClampMax[4]; + float sp_FOG_COL_RAM[4]; // Only using 3 elements but easier for std140 + float sp_FOG_COL_VERT[4]; // same comment + float ditherColorMax[4]; + float cp_AlphaTestValue; + float sp_FOG_DENSITY; +}; + +class MetalShaders +{ +public: + MetalShaders(); + + id GetBlitVertexShader() { return blitVertexShader; } + id GetBlitFragmentShader() { return blitFragmentShader; } + + id GetModVolVertexShader(const ModVolShaderParams& params) { return getShader(modVolVertexShaders, params); } + id GetModVolFragmentShader(bool divPosZ) { + auto modVolFragmentShader = modVolFragmentShaders.find(divPosZ); + if (modVolFragmentShader != modVolFragmentShaders.end()) + return modVolFragmentShader->second; + + modVolFragmentShaders[divPosZ] = compileShader(divPosZ); + return modVolFragmentShaders[divPosZ]; + } + + id GetVertexShader(const VertexShaderParams& params) { return getShader(vertexShaders, params); } + id GetFragmentShader(const FragmentShaderParams& params) { return getShader(fragmentShaders, params); } + + void term() + { + vertexShaders.clear(); + fragmentShaders.clear(); + + [vertexShaderConstants release]; + [fragmentShaderConstants release]; + [modVolShaderConstants release]; + } + +private: + id blitShaderLibrary; + id modVolShaderLibrary; + id vertexShaderLibrary; + id fragmentShaderLibrary; + MTLFunctionConstantValues* vertexShaderConstants; + MTLFunctionConstantValues* fragmentShaderConstants; + MTLFunctionConstantValues* modVolShaderConstants; + + template + id getShader(std::map> &map, T params) + { + u32 h = params.hash(); + auto it = map.find(h); + if (it != map.end()) + return it->second; + map[h] = compileShader(params); + return map[h]; + } + id compileShader(const VertexShaderParams& params); + id compileShader(const FragmentShaderParams& params); + id compileShader(const ModVolShaderParams& params); + id compileShader(bool divPosZ); + + id blitVertexShader; + id blitFragmentShader; + + std::map> modVolVertexShaders; + std::map> modVolFragmentShaders; + + std::map> vertexShaders; + std::map> fragmentShaders; +}; diff --git a/core/rend/metal/metal_shaders.mm b/core/rend/metal/metal_shaders.mm new file mode 100644 index 0000000000..9b890fb6d1 --- /dev/null +++ b/core/rend/metal/metal_shaders.mm @@ -0,0 +1,572 @@ +/* +Copyright 2024 flyinghead + + This file is part of Flycast. + + Flycast is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + Flycast is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Flycast. If not, see . +*/ + +#include "metal_shaders.h" + +#include "metal_context.h" + +static const char VertexShaderSource[] = R"( +#include +#include + +using namespace metal; + +constant bool pp_gouraud [[function_constant(0)]]; +constant bool div_pos_z [[function_constant(1)]]; + +struct VertexShaderUniforms +{ + float4x4 ndc_mat; +}; + +struct VertexIn +{ + float4 in_pos [[attribute(0)]]; + float4 in_base [[attribute(1)]]; + float4 in_offs [[attribute(2)]]; + float2 in_uv [[attribute(3)]]; +}; + +struct VertexOut +{ + // TODO: Interpolation mode + float4 vtx_base; + float4 vtx_offs; + float3 vtx_uv; + float4 position [[position]]; +}; + +vertex VertexOut vs_main(VertexIn in [[stage_in]], constant VertexShaderUniforms& uniforms [[buffer(0)]]) +{ + float4 vpos = uniforms.ndc_mat * in.in_pos; + + if (div_pos_z) { + vpos /= vpos.z; + vpos.z = vpos.w; + } + + VertexOut out = {}; + out.vtx_base = in.in_base; + out.vtx_offs = in.in_offs; + out.vtx_uv = float3(in.in_uv, vpos.z); + + if (pp_gouraud && !div_pos_z) { + out.vtx_base *= vpos.z; + out.vtx_offs *= vpos.z; + } + + if (!div_pos_z) { + out.vtx_uv.xy *= vpos.z; + vpos.w = 1.0; + vpos.z = 0.0; + } + + out.position = vpos; + + return out; +} +)"; + +static const char FragmentShaderSource[] = R"( +#include +#define PI 3.1415926 + +using namespace metal; + +constant bool cp_alpha_test [[function_constant(0)]]; +constant bool pp_clip_inside [[function_constant(1)]]; +constant bool pp_use_alpha [[function_constant(2)]]; +constant bool pp_texture [[function_constant(3)]]; +constant bool pp_ignore_tex_a [[function_constant(4)]]; +constant int pp_shad_instr [[function_constant(5)]]; +constant bool pp_offset [[function_constant(6)]]; +constant int pp_fog_ctrl [[function_constant(7)]]; +constant bool pp_gouraud [[function_constant(8)]]; +constant bool pp_bump_map [[function_constant(9)]]; +constant bool color_clamping [[function_constant(10)]]; +constant bool pp_trilinear [[function_constant(11)]]; +constant int pp_palette [[function_constant(12)]]; +constant bool div_pos_z [[function_constant(13)]]; +constant bool dithering [[function_constant(14)]]; + +constant bool has_fog_table = pp_fog_ctrl != 2; +constant bool has_palette = pp_palette != 0; + +struct FragmentShaderUniforms +{ + float4 color_clamp_min; + float4 color_clamp_max; + float4 sp_fog_col_ram; + float4 sp_fog_col_vert; + float4 dither_color_max; + float cp_alpha_test_value; + float sp_fog_density; +}; + +struct PushBlock +{ + float4 clip_test; + float trilinear_alpha; + float palette_index; +}; + +struct VertexOut +{ + // TODO: Interpolation mode + float4 vtx_base; + float4 vtx_offs; + float3 vtx_uv; + float4 position [[position]]; +}; + +struct FragmentOut +{ + float4 color [[color(0)]]; + float depth [[depth(any)]]; +}; + +float fog_mode2(float w, constant FragmentShaderUniforms& uniforms, + texture2d fog_table, sampler fog_table_sampler) +{ + float z = 0.0; + + if (div_pos_z) { + z = clamp(uniforms.sp_fog_density / w, 1.0, 255.9999); + } else { + z = clamp(uniforms.sp_fog_density * w, 1.0, 255.9999); + } + + float exp = floor(log2(z)); + float m = z * 16.0 / pow(2.0, exp) - 16.0; + float idx = floor(m) + exp * 16.0 + 0.5; + float4 fog_coef = fog_table.sample(fog_table_sampler, float2(idx / 128.0, 0.75 - (m - floor(m)) / 2.0)); + return fog_coef.r; +} + +float4 color_clamp(float4 col, constant FragmentShaderUniforms& uniforms) +{ + if (color_clamping) + { + return clamp(col, uniforms.color_clamp_min, uniforms.color_clamp_max); + } else { + return col; + } +} + +float4 get_palette_entry(texture2d palette, sampler palette_sampler, + float col_idx, constant PushBlock& push_constants) +{ + float2 c = float2(col_idx * 255.0 / 1023.0 + push_constants.palette_index, 0.5); + return palette.sample(palette_sampler, c); +} + +float4 palette_pixel(texture2d texture, sampler texture_sampler, + texture2d palette, sampler palette_sampler, + float3 coords, constant PushBlock& push_constants) +{ + if (div_pos_z) { + return get_palette_entry(palette, palette_sampler, texture.sample(texture_sampler, coords.xy).r, push_constants); + } else { + return get_palette_entry(palette, palette_sampler, texture.sample(texture_sampler, float2(coords.xy / coords.z)).r, push_constants); + } +} + +float4 palette_pixel_bilinear(texture2d texture, sampler texture_sampler, + texture2d palette, sampler palette_sampler, + float3 coords, constant PushBlock& push_constants) +{ + if (div_pos_z) { + coords.xy /= coords.z; + } + + float2 tex_size = float2(texture.get_width(), texture.get_height()); + float2 pix_coord = coords.xy * tex_size - 0.5; // Coordinates of top left pixel + float2 origin_pix_coords = floor(pix_coord); + + float2 sample_uv = (origin_pix_coords + 0.5) / tex_size; // UV coordinates of center of top left pixel + + // Sample from all surrounding texels + float4 c00 = get_palette_entry(palette, palette_sampler, texture.sample(texture_sampler, sample_uv).r, push_constants); + float4 c01 = get_palette_entry(palette, palette_sampler, texture.sample(texture_sampler, sample_uv, int2(0, 1)).r, push_constants); + float4 c11 = get_palette_entry(palette, palette_sampler, texture.sample(texture_sampler, sample_uv, int2(1, 1)).r, push_constants); + float4 c10 = get_palette_entry(palette, palette_sampler, texture.sample(texture_sampler, sample_uv, int2(1, 0)).r, push_constants); + + float2 weight = pix_coord - origin_pix_coords; + + // Bi-linear mixing + float4 temp0 = mix(c00, c10, weight.x); + float4 temp1 = mix(c01, c11, weight.x); + return mix(temp0, temp1, weight.y); +} + +fragment FragmentOut fs_main(VertexOut in [[stage_in]], constant FragmentShaderUniforms& uniforms [[buffer(0)]], + constant PushBlock& push_constants [[buffer(1)]], + texture2d tex [[texture(0), function_constant(pp_texture)]], sampler tex_sampler [[sampler(0), function_constant(pp_texture)]], + texture2d fog_table [[texture(2), function_constant(has_fog_table)]], sampler fog_table_sampler [[sampler(2), function_constant(has_fog_table)]], + texture2d palette [[texture(3), function_constant(has_palette)]], sampler palette_sampler [[sampler(3), function_constant(has_palette)]]) +{ + // Clip inside the box + if (pp_clip_inside) { + if (in.position.x >= push_constants.clip_test.x && in.position.x <= push_constants.clip_test.z + && in.position.y >= push_constants.clip_test.y && in.position.y <= push_constants.clip_test.w) + discard_fragment(); + } + + float4 color = in.vtx_base; + float4 offset = in.vtx_offs; + + if (pp_gouraud && !div_pos_z) { + color /= in.vtx_uv.z; + offset /= in.vtx_uv.z; + } + + if (!pp_use_alpha) { + color.a = 1.0; + } + + if (pp_fog_ctrl == 3) { + color = float4(uniforms.sp_fog_col_ram.rgb, fog_mode2(in.vtx_uv.z, uniforms, fog_table, fog_table_sampler)); + } + + if (pp_texture) { + float4 tex_col; + + if (pp_palette == 0) { + if (div_pos_z) { + tex_col = tex.sample(tex_sampler, in.vtx_uv.xy); + } else { + tex_col = tex.sample(tex_sampler, float2(in.vtx_uv.xy / in.vtx_uv.z)); + } + } else { + if (pp_palette == 1) { + tex_col = palette_pixel(tex, tex_sampler, palette, palette_sampler, in.vtx_uv, push_constants); + } else { + tex_col = palette_pixel_bilinear(tex, tex_sampler, palette, palette_sampler, in.vtx_uv, push_constants); + } + } + + if (pp_bump_map) { + float s = PI / 2.0 * (tex_col.a * 15.0 * 16.0 + tex_col.r * 15.0) / 255.0; + float r = 2.0 * PI * (tex_col.g * 15.0 * 16.0 + tex_col.b * 15.0) / 255.0; + tex_col.a = clamp(offset.a + offset.r * sin(s) + offset.g * cos(s) * cos(r - 2.0 * PI * offset.b), 0.0, 1.0); + tex_col.rgb = float3(1.0, 1.0, 1.0); + } else { + if (pp_ignore_tex_a) + tex_col.a = 1.0; + } + + if (pp_shad_instr == 0) { + color = tex_col; + } else if (pp_shad_instr == 1) { + color.rgb *= tex_col.rgb; + color.a = tex_col.a; + } else if (pp_shad_instr == 2) { + color.rgb = mix(color.rgb, tex_col.rgb, tex_col.a); + } else if (pp_shad_instr == 3) { + color *= tex_col; + } + + if (pp_offset && !pp_bump_map) { + color.rgb += offset.rgb; + } + } + + color = color_clamp(color, uniforms); + + if (pp_fog_ctrl == 0) { + color.rgb = mix(color.rgb, uniforms.sp_fog_col_ram.rgb, fog_mode2(in.vtx_uv.z, uniforms, fog_table, fog_table_sampler)); + } + + if (pp_fog_ctrl == 1 && pp_offset && !pp_bump_map) { + color.rgb = mix(color.rgb, uniforms.sp_fog_col_vert.rgb, offset.a); + } + + if (pp_trilinear) + color *= push_constants.trilinear_alpha; + + if (cp_alpha_test) { + color.a = round(color.a * 255.0) / 255.0; + if (uniforms.cp_alpha_test_value > color.a) + discard_fragment(); + color.a = 1.0; + } + + float w; + + if (div_pos_z) { + w = 100000.0 / in.vtx_uv.z; + } else { + w = 100000.0 * in.vtx_uv.z; + } + + float depth = log2(1.0 + max(w, -0.999999)) / 34.0; + + if (dithering) { + constexpr float dither_table[16] = { + 0.9375, 0.1875, 0.75, 0.0, + 0.4375, 0.6875, 0.25, 0.5, + 0.8125, 0.0625, 0.875, 0.125, + 0.3125, 0.5625, 0.375, 0.625 + }; + + float r = dither_table[int(fmod(in.position.y, 4.0)) * 4 + int(fmod(in.position.x, 4.0))]; + // 31 for 5-bit color, 63 for 6-bits, 15 for 4 bits + color += r / uniforms.dither_color_max; + // Avoid rounding + color = floor(color * 255.0) / 255.0; + } + + return FragmentOut { color, depth }; +} +)"; + +static const char BlitShader[] = R"( +#include +using namespace metal; + +struct VertexOut { + float4 position [[position]]; + float2 texCoord; +}; + +vertex VertexOut vs_main(uint vertexID [[vertex_id]]) { + // Predefined positions and texture coordinates for a full-screen quad + float4 positions[4] = { + float4(-1.0, -1.0, 0.0, 1.0), // Bottom-left + float4( 1.0, -1.0, 0.0, 1.0), // Bottom-right + float4(-1.0, 1.0, 0.0, 1.0), // Top-left + float4( 1.0, 1.0, 0.0, 1.0) // Top-right + }; + + float2 texCoords[4] = { + float2(0.0, 1.0), // Bottom-left + float2(1.0, 1.0), // Bottom-right + float2(0.0, 0.0), // Top-left + float2(1.0, 0.0) // Top-right + }; + + VertexOut out; + out.position = positions[vertexID]; + out.texCoord = texCoords[vertexID]; + return out; +} + +fragment float4 fs_main(VertexOut in [[stage_in]], + texture2d sourceTexture [[texture(0)]]) { + constexpr sampler textureSampler(mag_filter::linear, min_filter::linear); + return sourceTexture.sample(textureSampler, in.texCoord); +} +)"; + +static const char ModVolShaderSource[] = R"( +#include +#include + +using namespace metal; + +constant bool div_pos_z [[function_constant(1)]]; + +struct VertexShaderUniforms +{ + float4x4 ndc_mat; +}; + +struct VertexIn +{ + float4 in_pos [[attribute(0)]]; +}; + +struct VertexOut +{ + float depth; + float4 position [[position]]; +}; + +struct FragmentOut +{ + float4 color [[color(0)]]; + float depth [[depth(any)]]; +}; + +struct PushBlock +{ + float sp_shader_color; +}; + +vertex VertexOut vs_main(VertexIn in [[stage_in]], constant VertexShaderUniforms& uniforms [[buffer(0)]]) { + float4 vpos = uniforms.ndc_mat * in.in_pos; + + VertexOut out = {}; + + if (div_pos_z) { + vpos /= vpos.z; + vpos.z = vpos.w; + out.depth = vpos.w; + } else { + out.depth = vpos.z; + vpos.w = 1.0; + vpos.z = 0.0; + } + + out.position = vpos; + return out; +} + +fragment FragmentOut fs_main(VertexOut in [[stage_in]], + constant PushBlock& push_constants [[buffer(1)]]) { + FragmentOut out = {}; + + float w; + + if (div_pos_z) { + w = 100000.0 / in.depth; + } else { + w = 100000.0 * in.depth; + } + + out.depth = log2(1.0 + max(w, -0.999999)) / 34.0; + out.color = float4(0.0, 0.0, 0.0, push_constants.sp_shader_color); + return out; +} +)"; + +// TODO: Handle gouraud interpolation +// TODO: N2 Shaders + +MetalShaders::MetalShaders() { + auto device = MetalContext::Instance()->GetDevice(); + + NSError* error = nil; + fragmentShaderLibrary = [device newLibraryWithSource:[NSString stringWithUTF8String:FragmentShaderSource] options:nil error:&error]; + fragmentShaderConstants = [[MTLFunctionConstantValues alloc] init]; + + if (!fragmentShaderLibrary) { + ERROR_LOG(RENDERER, "%s", [[error localizedDescription] UTF8String]); + assert(false); + } + + vertexShaderLibrary = [device newLibraryWithSource:[NSString stringWithUTF8String:VertexShaderSource] options:nil error:&error]; + vertexShaderConstants = [[MTLFunctionConstantValues alloc] init]; + + if (!vertexShaderLibrary) { + ERROR_LOG(RENDERER, "%s", [[error localizedDescription] UTF8String]); + assert(false); + } + + blitShaderLibrary = [device newLibraryWithSource:[NSString stringWithUTF8String:BlitShader] options:nil error:&error]; + + if (!blitShaderLibrary) { + ERROR_LOG(RENDERER, "%s", [[error localizedDescription] UTF8String]); + assert(false); + } + + blitVertexShader = [blitShaderLibrary newFunctionWithName:@"vs_main" constantValues:[[MTLFunctionConstantValues alloc] init] error:&error]; + + if (!blitVertexShader) { + ERROR_LOG(RENDERER, "%s", [[error localizedDescription] UTF8String]); + assert(false); + } + + blitFragmentShader = [blitShaderLibrary newFunctionWithName:@"fs_main" constantValues:[[MTLFunctionConstantValues alloc] init] error:&error]; + + if (!blitFragmentShader) { + ERROR_LOG(RENDERER, "%s", [[error localizedDescription] UTF8String]); + assert(false); + } + + modVolShaderLibrary = [device newLibraryWithSource:[NSString stringWithUTF8String:ModVolShaderSource] options:nil error:&error]; + modVolShaderConstants = [[MTLFunctionConstantValues alloc] init]; +} + +id MetalShaders::compileShader(const VertexShaderParams ¶ms) { + [vertexShaderConstants setConstantValue:¶ms.gouraud type:MTLDataTypeBool atIndex:0]; + [vertexShaderConstants setConstantValue:¶ms.divPosZ type:MTLDataTypeBool atIndex:1]; + + NSError* error = nil; + + id function = [vertexShaderLibrary newFunctionWithName:@"vs_main" constantValues:vertexShaderConstants error:&error]; + + if (!function) { + ERROR_LOG(RENDERER, "%s", [[error localizedDescription] UTF8String]); + assert(false); + } + + return function; +} + +id MetalShaders::compileShader(const FragmentShaderParams ¶ms) { + [fragmentShaderConstants setConstantValue:¶ms.alphaTest type:MTLDataTypeBool atIndex:0]; + [fragmentShaderConstants setConstantValue:¶ms.insideClipTest type:MTLDataTypeBool atIndex:1]; + [fragmentShaderConstants setConstantValue:¶ms.useAlpha type:MTLDataTypeBool atIndex:2]; + [fragmentShaderConstants setConstantValue:¶ms.texture type:MTLDataTypeBool atIndex:3]; + [fragmentShaderConstants setConstantValue:¶ms.ignoreTexAlpha type:MTLDataTypeBool atIndex:4]; + [fragmentShaderConstants setConstantValue:¶ms.shaderInstr type:MTLDataTypeInt atIndex:5]; + [fragmentShaderConstants setConstantValue:¶ms.offset type:MTLDataTypeBool atIndex:6]; + [fragmentShaderConstants setConstantValue:¶ms.fog type:MTLDataTypeInt atIndex:7]; + [fragmentShaderConstants setConstantValue:¶ms.gouraud type:MTLDataTypeBool atIndex:8]; + [fragmentShaderConstants setConstantValue:¶ms.bumpmap type:MTLDataTypeBool atIndex:9]; + [fragmentShaderConstants setConstantValue:¶ms.clamping type:MTLDataTypeBool atIndex:10]; + [fragmentShaderConstants setConstantValue:¶ms.trilinear type:MTLDataTypeBool atIndex:11]; + [fragmentShaderConstants setConstantValue:¶ms.palette type:MTLDataTypeInt atIndex:12]; + [fragmentShaderConstants setConstantValue:¶ms.divPosZ type:MTLDataTypeBool atIndex:13]; + [fragmentShaderConstants setConstantValue:¶ms.dithering type:MTLDataTypeBool atIndex:14]; + + NSError* error = nil; + + id function = [fragmentShaderLibrary newFunctionWithName:@"fs_main" constantValues:fragmentShaderConstants error:&error]; + + if (!function) { + ERROR_LOG(RENDERER, "%s", [[error localizedDescription] UTF8String]); + assert(false); + } + + return function; +} + +id MetalShaders::compileShader(const ModVolShaderParams ¶ms) { + [modVolShaderConstants setConstantValue:¶ms.divPosZ type:MTLDataTypeBool atIndex:0]; + + NSError* error = nil; + + // TODO: Naomi2 ModVol Frag Shader + id function = [modVolShaderLibrary newFunctionWithName:@"vs_main" constantValues:modVolShaderConstants error:&error]; + + if (!function) { + ERROR_LOG(RENDERER, "%s", [[error localizedDescription] UTF8String]); + assert(false); + } + + return function; +} + +id MetalShaders::compileShader(bool divPosZ) { + [modVolShaderConstants setConstantValue:&divPosZ type:MTLDataTypeBool atIndex:0]; + + NSError* error = nil; + + id function = [modVolShaderLibrary newFunctionWithName:@"fs_main" constantValues:modVolShaderConstants error:&error]; + + if (!function) { + ERROR_LOG(RENDERER, "%s", [[error localizedDescription] UTF8String]); + assert(false); + } + + return function; +} + diff --git a/core/rend/metal/metal_texture.h b/core/rend/metal/metal_texture.h new file mode 100644 index 0000000000..f9a22fa72e --- /dev/null +++ b/core/rend/metal/metal_texture.h @@ -0,0 +1,114 @@ +/* + Copyright 2024 flyinghead + + This file is part of Flycast. + + Flycast is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + Flycast is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Flycast. If not, see . +*/ +#pragma once +#include "rend/TexCache.h" +#include "metal_context.h" +#include +#include + +class MetalTexture final : public BaseTextureCacheData +{ +public: + MetalTexture(TSP tsp = {}, TCW tcw = {}) : BaseTextureCacheData(tsp, tcw) {} + id texture; + + + std::string GetId() override { return std::to_string([texture gpuResourceID]._impl); } + void UploadToGPU(int width, int height, const u8 *temp_tex_buffer, bool mipmapped, bool mipmapsIncluded = false) override; + bool Delete() override; +}; + +class MetalSamplers +{ +public: + explicit MetalSamplers(); + ~MetalSamplers(); + + static const u32 TSP_Mask = 0x7ef00; + + void term() { + for (auto &[u, samp] : samplers) { + [samp release]; + } + + samplers.clear(); + } + + id GetSampler(const PolyParam& poly, bool punchThrough, bool texture1 = false) { + TSP tsp = texture1 ? poly.tsp1 : poly.tsp; + if (poly.texture != nullptr && poly.texture->gpuPalette) + tsp.FilterMode = 0; + else if (config::TextureFiltering == 1) + tsp.FilterMode = 0; + else if (config::TextureFiltering == 2) + tsp.FilterMode = 1; + return GetSampler(tsp, punchThrough); + } + + id GetSampler(TSP tsp, bool punchThrough = false) { + const u32 hash = (tsp.full & TSP_Mask) | punchThrough; // MipMapD, FilterMode, ClampU, ClampV, FlipU, FlipV + id sampler = samplers[hash]; + + if (!sampler) { + auto desc = [[MTLSamplerDescriptor alloc] init]; + + if (tsp.FilterMode != 0) { + if (punchThrough) { + [desc setMinFilter:MTLSamplerMinMagFilterLinear]; + [desc setMagFilter:MTLSamplerMinMagFilterLinear]; + [desc setMipFilter:MTLSamplerMipFilterNearest]; + } else { + [desc setMinFilter:MTLSamplerMinMagFilterLinear]; + [desc setMagFilter:MTLSamplerMinMagFilterLinear]; + [desc setMipFilter:MTLSamplerMipFilterLinear]; + } + } + else { + [desc setMinFilter:MTLSamplerMinMagFilterNearest]; + [desc setMagFilter:MTLSamplerMinMagFilterNearest]; + [desc setMipFilter:MTLSamplerMipFilterNearest]; + } + + auto sRepeat = tsp.ClampU ? MTLSamplerAddressModeClampToEdge : tsp.FlipU ? MTLSamplerAddressModeMirrorRepeat : MTLSamplerAddressModeRepeat; + auto tRepeat = tsp.ClampV ? MTLSamplerAddressModeClampToEdge : tsp.FlipV ? MTLSamplerAddressModeMirrorRepeat : MTLSamplerAddressModeRepeat; + + [desc setSAddressMode:sRepeat]; + [desc setTAddressMode:tRepeat]; + [desc setRAddressMode:tRepeat]; + [desc setCompareFunction:MTLCompareFunctionNever]; + [desc setMaxAnisotropy:config::AnisotropicFiltering]; + + sampler = [MetalContext::Instance()->GetDevice() newSamplerStateWithDescriptor:desc]; + + [desc release]; + + samplers.emplace(hash, sampler).first->second; + } + + return sampler; + } + +private: + std::unordered_map> samplers; +}; + +class MetalTextureCache final : public BaseTextureCache +{ + +}; \ No newline at end of file diff --git a/core/rend/metal/metal_texture.mm b/core/rend/metal/metal_texture.mm new file mode 100644 index 0000000000..cc480c0996 --- /dev/null +++ b/core/rend/metal/metal_texture.mm @@ -0,0 +1,90 @@ +/* + Copyright 2024 flyinghead + + This file is part of Flycast. + + Flycast is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + Flycast is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Flycast. If not, see . +*/ + +#include "metal_texture.h" + +void MetalTexture::UploadToGPU(int width, int height, const u8 *temp_tex_buffer, bool mipmapped, bool mipmapsIncluded) +{ + MTLPixelFormat format = MTLPixelFormatInvalid; + + u32 bpp = 2; + switch (tex_type) + { + case TextureType::_5551: + format = MTLPixelFormatA1BGR5Unorm; + break; + case TextureType::_565: + format = MTLPixelFormatB5G6R5Unorm; + break; + case TextureType::_4444: + format = MTLPixelFormatABGR4Unorm; + break; + case TextureType::_8888: + bpp = 4; + format = MTLPixelFormatRGBA8Unorm; + break; + case TextureType::_8: + bpp = 1; + format = MTLPixelFormatR8Unorm; + break; + } + + int mipmapLevels = 1; + if (mipmapsIncluded) + { + mipmapLevels = 0; + int dim = width; + while (dim != 0) + { + mipmapLevels++; + dim >>= 1; + } + } + + MTLTextureDescriptor *desc = [[MTLTextureDescriptor alloc] init]; + + [desc setWidth:width]; + [desc setHeight:height]; + [desc setPixelFormat:format]; + [desc setMipmapLevelCount:mipmapLevels]; + [desc setStorageMode:MTLStorageModeShared]; + [desc setUsage:MTLTextureUsageShaderRead]; + + auto device = MetalContext::Instance()->GetDevice(); + + texture = [device newTextureWithDescriptor:desc]; + [desc release]; + + MTLRegion region = { 0, 0, static_cast(width), static_cast(height) }; + [texture replaceRegion:region mipmapLevel:0 withBytes:temp_tex_buffer bytesPerRow:bpp * width]; +} + +bool MetalTexture::Delete() +{ + [texture setPurgeableState:MTLPurgeableStateEmpty]; + [texture release]; + texture = nil; + + return true; +} + +MetalSamplers::MetalSamplers() = default; +MetalSamplers::~MetalSamplers() { + term(); +} diff --git a/core/rend/transform_matrix.h b/core/rend/transform_matrix.h index d793272725..54936b0914 100644 --- a/core/rend/transform_matrix.h +++ b/core/rend/transform_matrix.h @@ -55,7 +55,7 @@ inline static void getPvrFramebufferSize(const rend_context& rendCtx, int& width // +Y is up in clip, NDC and framebuffer coordinates // Vulkan: // +Y is down in clip, NDC and framebuffer coordinates -// DirectX9: +// DirectX9 & Metal: // +Y is up in clip and NDC coordinates, but down in framebuffer coordinates // Y must also be flipped for render-to-texture so that the top of the texture comes first enum CoordSystem { COORD_OPENGL, COORD_VULKAN, COORD_DIRECTX }; diff --git a/core/types.h b/core/types.h index f33a8caa07..f434429de2 100644 --- a/core/types.h +++ b/core/types.h @@ -102,6 +102,7 @@ enum class RenderType { DirectX9 = 1, DirectX11 = 2, DirectX11_OIT = 6, + Metal = 7 }; static inline bool isOpenGL(RenderType renderType) { @@ -113,6 +114,9 @@ static inline bool isVulkan(RenderType renderType) { static inline bool isDirectX(RenderType renderType) { return renderType == RenderType::DirectX9 || renderType == RenderType::DirectX11 || renderType == RenderType::DirectX11_OIT; } +static inline bool isMetal(RenderType renderType) { + return renderType == RenderType::Metal; +} enum class KeyboardLayout { JP = 1, diff --git a/core/ui/gui.cpp b/core/ui/gui.cpp index 5540f123a3..98bc5dac94 100644 --- a/core/ui/gui.cpp +++ b/core/ui/gui.cpp @@ -2198,6 +2198,9 @@ static void gui_settings_video() renderApi = 3; perPixel = true; break; + case RenderType::Metal: + renderApi = 4; + perPixel = true; } constexpr int apiCount = 0 @@ -2212,6 +2215,9 @@ static void gui_settings_video() #endif #ifdef USE_DX11 + 1 + #endif + #ifdef USE_METAL + + 1 #endif ; @@ -2227,7 +2233,7 @@ static void gui_settings_video() #endif #ifdef USE_VULKAN #ifdef __APPLE__ - ImGui::RadioButton("Vulkan (Metal)", &renderApi, 1); + ImGui::RadioButton("Vulkan (MoltenVK)", &renderApi, 1); ImGui::SameLine(0, innerSpacing); ShowHelpMarker("MoltenVK: An implementation of Vulkan that runs on Apple's Metal graphics framework"); #else @@ -2235,6 +2241,12 @@ static void gui_settings_video() #endif // __APPLE__ ImGui::NextColumn(); #endif +#ifdef USE_METAL +#ifdef __APPLE__ + ImGui::RadioButton("Metal", &renderApi, 4); + ImGui::NextColumn(); +#endif +#endif #ifdef USE_DX9 { DisabledScope _(settings.platform.isNaomi2()); @@ -2565,6 +2577,8 @@ static void gui_settings_video() case 3: config::RendererType = perPixel ? RenderType::DirectX11_OIT : RenderType::DirectX11; break; + case 4: + config::RendererType = RenderType::Metal; } } diff --git a/core/ui/mainui.cpp b/core/ui/mainui.cpp index 4b1ad55b5b..0cc7a22542 100644 --- a/core/ui/mainui.cpp +++ b/core/ui/mainui.cpp @@ -99,8 +99,8 @@ void mainui_loop(bool forceStart) if (config::RendererType != currentRenderer || forceReinit) { mainui_term(); - int prevApi = isOpenGL(currentRenderer) ? 0 : isVulkan(currentRenderer) ? 1 : currentRenderer == RenderType::DirectX9 ? 2 : 3; - int newApi = isOpenGL(config::RendererType) ? 0 : isVulkan(config::RendererType) ? 1 : config::RendererType == RenderType::DirectX9 ? 2 : 3; + int prevApi = isOpenGL(currentRenderer) ? 0 : isVulkan(currentRenderer) ? 1 : currentRenderer == RenderType::DirectX9 ? 2 : currentRenderer == RenderType::DirectX11 ? 3 : 4; + int newApi = isOpenGL(config::RendererType) ? 0 : isVulkan(config::RendererType) ? 1 : config::RendererType == RenderType::DirectX9 ? 2 : currentRenderer == RenderType::DirectX11 ? 3 : 4; if (newApi != prevApi || forceReinit) switchRenderApi(); mainui_init(); diff --git a/core/wsi/switcher.cpp b/core/wsi/switcher.cpp index 8d7e4dd23b..de8e65fce2 100644 --- a/core/wsi/switcher.cpp +++ b/core/wsi/switcher.cpp @@ -30,7 +30,11 @@ VulkanContext theVulkanContext; #endif +#ifdef USE_METAL +#include "rend/metal/metal_context.h" +MetalContext theMetalContext; +#endif GraphicsContext *GraphicsContext::instance; void initRenderApi(void *window, void *display) @@ -69,6 +73,18 @@ void initRenderApi(void *window, void *display) config::RendererType = RenderType::OpenGL; } #endif +#ifdef USE_METAL + if (isMetal(config::RendererType)) + { + theMetalContext.setWindow(window, display); + + if (theMetalContext.init()) + return; + // Fall back to OpenGL + WARN_LOG(RENDERER, "Metal init failed. Falling back to OpenGL."); + config::RendererType = RenderType::OpenGL; + } +#endif #ifdef USE_OPENGL if (!isOpenGL(config::RendererType)) config::RendererType = RenderType::OpenGL; From 7bbec8da125db99861edbf919e6819a6e61324f6 Mon Sep 17 00:00:00 2001 From: Isaac Marovitz Date: Tue, 20 May 2025 19:58:02 -0400 Subject: [PATCH 02/48] Hopefully fix build Signed-off-by: Isaac Marovitz --- core/rend/metal/metal_context.h | 11 ++++++++--- core/rend/metal/metal_context.mm | 4 ++++ 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/core/rend/metal/metal_context.h b/core/rend/metal/metal_context.h index 65fea3ed2c..4455d926ba 100644 --- a/core/rend/metal/metal_context.h +++ b/core/rend/metal/metal_context.h @@ -17,8 +17,11 @@ Copyright 2024 flyinghead along with Flycast. If not, see . */ #pragma once + +#ifdef __OBJC__ #include #include +#endif #include "wsi/context.h" @@ -31,16 +34,16 @@ class MetalContext : public GraphicsContext bool init(); void term() override; +#ifdef __OBJC__ id GetDevice() const { return device; } CAMetalLayer* GetLayer() const { return layer; } id GetQueue() const { return queue; } id commandBuffer = nil; +#endif void resize() override; void Present(); - std::string getDriverName() override { - return [[device name] UTF8String]; - } + std::string getDriverName() override; std::string getDriverVersion() override { return ""; @@ -56,9 +59,11 @@ class MetalContext : public GraphicsContext static MetalContext* Instance() { return contextInstance; } private: +#ifdef __OBJC__ NSAutoreleasePool *pool; id device = MTLCreateSystemDefaultDevice(); id queue = nil; CAMetalLayer* layer; +#endif static MetalContext* contextInstance; }; \ No newline at end of file diff --git a/core/rend/metal/metal_context.mm b/core/rend/metal/metal_context.mm index a92db6a0a6..5e3dc38c76 100644 --- a/core/rend/metal/metal_context.mm +++ b/core/rend/metal/metal_context.mm @@ -63,6 +63,10 @@ } +std::string MetalContext::getDriverName() { + return [[device name] UTF8String]; +} + void MetalContext::Present() { } From 31d07f7add38a8ce4ffc193939743ed7ce9bc196 Mon Sep 17 00:00:00 2001 From: Isaac Marovitz Date: Tue, 20 May 2025 20:10:26 -0400 Subject: [PATCH 03/48] Remove explicit releases Signed-off-by: Isaac Marovitz --- core/rend/metal/metal_buffer.h | 1 - core/rend/metal/metal_context.h | 1 - core/rend/metal/metal_context.mm | 2 -- core/rend/metal/metal_driver.h | 2 -- core/rend/metal/metal_pipeline.mm | 18 ------------------ core/rend/metal/metal_renderer.h | 1 - core/rend/metal/metal_renderer.mm | 14 -------------- core/rend/metal/metal_shaders.h | 4 ---- core/rend/metal/metal_texture.h | 6 ------ core/rend/metal/metal_texture.mm | 2 -- 10 files changed, 51 deletions(-) diff --git a/core/rend/metal/metal_buffer.h b/core/rend/metal/metal_buffer.h index 5148729055..3d94da0890 100644 --- a/core/rend/metal/metal_buffer.h +++ b/core/rend/metal/metal_buffer.h @@ -26,7 +26,6 @@ struct MetalBufferData ~MetalBufferData() { [buffer setPurgeableState:MTLPurgeableStateEmpty]; - [buffer release]; buffer = nil; } diff --git a/core/rend/metal/metal_context.h b/core/rend/metal/metal_context.h index 4455d926ba..390055cc85 100644 --- a/core/rend/metal/metal_context.h +++ b/core/rend/metal/metal_context.h @@ -60,7 +60,6 @@ class MetalContext : public GraphicsContext static MetalContext* Instance() { return contextInstance; } private: #ifdef __OBJC__ - NSAutoreleasePool *pool; id device = MTLCreateSystemDefaultDevice(); id queue = nil; CAMetalLayer* layer; diff --git a/core/rend/metal/metal_context.mm b/core/rend/metal/metal_context.mm index 5e3dc38c76..2d9a896927 100644 --- a/core/rend/metal/metal_context.mm +++ b/core/rend/metal/metal_context.mm @@ -25,7 +25,6 @@ MetalContext *MetalContext::contextInstance; bool MetalContext::init() { - pool = [[NSAutoreleasePool alloc] init]; GraphicsContext::instance = this; #if defined(USE_SDL) @@ -74,7 +73,6 @@ void MetalContext::term() { GraphicsContext::instance = nullptr; imguiDriver.reset(); - [pool release]; } MetalContext::MetalContext() { diff --git a/core/rend/metal/metal_driver.h b/core/rend/metal/metal_driver.h index d4c7f2b889..c845e84302 100644 --- a/core/rend/metal/metal_driver.h +++ b/core/rend/metal/metal_driver.h @@ -53,8 +53,6 @@ class MetalDriver final : public ImGuiDriver { commandEncoder = [context->commandBuffer renderCommandEncoderWithDescriptor:descriptor]; ImGui_ImplMetal_NewFrame(descriptor); - - [descriptor release]; } void renderDrawData(ImDrawData *drawData, bool gui_open) override { diff --git a/core/rend/metal/metal_pipeline.mm b/core/rend/metal/metal_pipeline.mm index 18e87dc501..b902edf046 100644 --- a/core/rend/metal/metal_pipeline.mm +++ b/core/rend/metal/metal_pipeline.mm @@ -43,8 +43,6 @@ ERROR_LOG(RENDERER, "Failed to create Blit Pipeline State: %s", [error localizedDescription]);; } - [descriptor release]; - blitPassPipeline = state; } @@ -96,11 +94,6 @@ ERROR_LOG(RENDERER, "Failed to create Depth Render Pipeline State: %s", [[error localizedDescription] UTF8String]); } - [descriptor release]; - if (vertexDesc) { - [vertexDesc release]; - } - modVolPipelines[hash(mode, cullMode, naomi2)] = state; } @@ -131,8 +124,6 @@ ERROR_LOG(RENDERER, "Failed to create Depth Render Pipeline State: %s", [[error localizedDescription] UTF8String]); } - [descriptor release]; - depthPassPipelines[hash(cullMode, naomi2)] = state; } @@ -192,8 +183,6 @@ ERROR_LOG(RENDERER, "Failed to create Render Pipeline State: %s", [[error localizedDescription] UTF8String]); } - [descriptor release]; - pipelines[hash(listType, sortTriangles, &pp, gpuPalette, dithering)] = state; } @@ -252,8 +241,6 @@ auto state = [MetalContext::Instance()->GetDevice() newDepthStencilStateWithDescriptor:descriptor]; - [descriptor release]; - modVolStencilStates[hash(mode, cullMode, naomi2)] = state; } @@ -265,8 +252,6 @@ auto state = [MetalContext::Instance()->GetDevice() newDepthStencilStateWithDescriptor:descriptor]; - [descriptor release]; - depthPassDepthStencilStates[hash(cullMode, naomi2)] = state; } @@ -318,8 +303,5 @@ auto state = [MetalContext::Instance()->GetDevice() newDepthStencilStateWithDescriptor:descriptor]; - [descriptor release]; - [stencilDescriptor release]; - depthStencilStates[hash(listType, sortTriangles, shadowed, &pp)] = state; } diff --git a/core/rend/metal/metal_renderer.h b/core/rend/metal/metal_renderer.h index 2e4b82dd05..3e2a697ce9 100644 --- a/core/rend/metal/metal_renderer.h +++ b/core/rend/metal/metal_renderer.h @@ -73,7 +73,6 @@ class MetalRenderer final : public Renderer INFO_LOG(RENDERER, "Increasing main buffer size %zd -> %d", mainBuffer->bufferSize, newSize); [mainBuffer->buffer setPurgeableState: MTLPurgeableStateEmpty]; - [mainBuffer->buffer release]; mainBuffer = new MetalBufferData(newSize); } diff --git a/core/rend/metal/metal_renderer.mm b/core/rend/metal/metal_renderer.mm index 8b2a687afc..fb858886fd 100644 --- a/core/rend/metal/metal_renderer.mm +++ b/core/rend/metal/metal_renderer.mm @@ -439,8 +439,6 @@ } bool MetalRenderer::Draw(const MetalTexture *fogTexture, const MetalTexture *paletteTexture) { - NSAutoreleasePool *pool = [[NSAutoreleasePool alloc] init]; - FragmentShaderUniforms fragUniforms = MakeFragmentUniforms(); dithering = config::EmulateFramebuffer && pvrrc.fb_W_CTRL.fb_dither && pvrrc.fb_W_CTRL.fb_packmode <= 3; if (dithering) { @@ -469,13 +467,11 @@ if (frameBuffer != nil) { [frameBuffer setPurgeableState:MTLPurgeableStateEmpty]; - [frameBuffer release]; frameBuffer = nil; } if (depthBuffer != nil) { [depthBuffer setPurgeableState:MTLPurgeableStateEmpty]; - [depthBuffer release]; depthBuffer = nil; } @@ -486,7 +482,6 @@ [desc setUsage:MTLTextureUsageShaderRead | MTLTextureUsageRenderTarget]; frameBuffer = [MetalContext::Instance()->GetDevice() newTextureWithDescriptor:desc]; - [desc release]; MTLTextureDescriptor *depthDesc = [[MTLTextureDescriptor alloc] init]; [depthDesc setPixelFormat:MTLPixelFormatDepth32Float_Stencil8]; @@ -495,7 +490,6 @@ [depthDesc setUsage:MTLTextureUsageShaderRead | MTLTextureUsageRenderTarget]; depthBuffer = [MetalContext::Instance()->GetDevice() newTextureWithDescriptor:depthDesc]; - [depthDesc release]; auto drawable = [MetalContext::Instance()->GetLayer() nextDrawable]; @@ -519,13 +513,8 @@ [descriptor setDepthAttachment:depthAttachmentDescriptor]; [descriptor setStencilAttachment:stencilAttachmentDescriptor]; - [depthAttachmentDescriptor release]; - [stencilAttachmentDescriptor release]; - id renderEncoder = [buffer renderCommandEncoderWithDescriptor:descriptor]; - [descriptor release]; - [renderEncoder setFragmentTexture:fogTexture->texture atIndex:2]; [renderEncoder setFragmentTexture:paletteTexture->texture atIndex:3]; @@ -588,8 +577,6 @@ renderEncoder = [buffer renderCommandEncoderWithDescriptor:descriptor]; - [descriptor release]; - [renderEncoder setRenderPipelineState:pipelineManager.GetBlitPassPipeline()]; [renderEncoder setFragmentTexture:frameBuffer atIndex:0]; [renderEncoder drawPrimitives: MTLPrimitiveTypeTriangleStrip vertexStart:0 vertexCount:4]; @@ -597,7 +584,6 @@ [buffer presentDrawable:drawable]; [buffer commit]; - [pool release]; DEBUG_LOG(RENDERER, "Render command buffer released"); diff --git a/core/rend/metal/metal_shaders.h b/core/rend/metal/metal_shaders.h index 1f9e470121..2fa46f0d3b 100644 --- a/core/rend/metal/metal_shaders.h +++ b/core/rend/metal/metal_shaders.h @@ -112,10 +112,6 @@ class MetalShaders { vertexShaders.clear(); fragmentShaders.clear(); - - [vertexShaderConstants release]; - [fragmentShaderConstants release]; - [modVolShaderConstants release]; } private: diff --git a/core/rend/metal/metal_texture.h b/core/rend/metal/metal_texture.h index f9a22fa72e..99cc6f1923 100644 --- a/core/rend/metal/metal_texture.h +++ b/core/rend/metal/metal_texture.h @@ -43,10 +43,6 @@ class MetalSamplers static const u32 TSP_Mask = 0x7ef00; void term() { - for (auto &[u, samp] : samplers) { - [samp release]; - } - samplers.clear(); } @@ -96,8 +92,6 @@ class MetalSamplers sampler = [MetalContext::Instance()->GetDevice() newSamplerStateWithDescriptor:desc]; - [desc release]; - samplers.emplace(hash, sampler).first->second; } diff --git a/core/rend/metal/metal_texture.mm b/core/rend/metal/metal_texture.mm index cc480c0996..518200f0aa 100644 --- a/core/rend/metal/metal_texture.mm +++ b/core/rend/metal/metal_texture.mm @@ -69,7 +69,6 @@ auto device = MetalContext::Instance()->GetDevice(); texture = [device newTextureWithDescriptor:desc]; - [desc release]; MTLRegion region = { 0, 0, static_cast(width), static_cast(height) }; [texture replaceRegion:region mipmapLevel:0 withBytes:temp_tex_buffer bytesPerRow:bpp * width]; @@ -78,7 +77,6 @@ bool MetalTexture::Delete() { [texture setPurgeableState:MTLPurgeableStateEmpty]; - [texture release]; texture = nil; return true; From 9f6272b509d65ccd1195a4bf16d65b2161bf9f5e Mon Sep 17 00:00:00 2001 From: Isaac Marovitz Date: Tue, 20 May 2025 20:20:54 -0400 Subject: [PATCH 04/48] Fix call Signed-off-by: Isaac Marovitz --- core/rend/metal/metal_renderer.mm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/rend/metal/metal_renderer.mm b/core/rend/metal/metal_renderer.mm index fb858886fd..1cee657fba 100644 --- a/core/rend/metal/metal_renderer.mm +++ b/core/rend/metal/metal_renderer.mm @@ -258,7 +258,7 @@ [encoder setFragmentTexture:texture atIndex:0]; // Texture sampler - [encoder setFragmentSamplerState:samplers.GetSampler(poly, listType == ListType_Punch_Through), 0]; + [encoder setFragmentSamplerState:samplers.GetSampler(poly, listType == ListType_Punch_Through) atIndex:0]; } if (poly.pcw.Texture || poly.isNaomi2()) From 5aa293bcea1558f056adc4ed26448c8ef215ec27 Mon Sep 17 00:00:00 2001 From: Isaac Marovitz Date: Tue, 20 May 2025 20:32:44 -0400 Subject: [PATCH 05/48] Fix some mistakes Signed-off-by: Isaac Marovitz --- core/rend/metal/metal_context.mm | 2 +- core/rend/metal/metal_driver.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/core/rend/metal/metal_context.mm b/core/rend/metal/metal_context.mm index 2d9a896927..bbf8b11fbb 100644 --- a/core/rend/metal/metal_context.mm +++ b/core/rend/metal/metal_context.mm @@ -39,7 +39,7 @@ return false; } - layer = static_cast>(SDL_Metal_GetLayer(view)); + layer = static_cast(SDL_Metal_GetLayer(view)); #endif if (!device) { diff --git a/core/rend/metal/metal_driver.h b/core/rend/metal/metal_driver.h index c845e84302..d6c234ffe5 100644 --- a/core/rend/metal/metal_driver.h +++ b/core/rend/metal/metal_driver.h @@ -115,6 +115,6 @@ class MetalDriver final : public ImGuiDriver { bool frameRendered = false; id commandEncoder; - CAMetalDrawable *drawable; + id drawable; std::unordered_map textures; }; From 23b9555e04000735b5f1fe1867e3a60afdee367c Mon Sep 17 00:00:00 2001 From: Isaac Marovitz Date: Tue, 20 May 2025 20:55:22 -0400 Subject: [PATCH 06/48] Fix texture imgui cast Signed-off-by: Isaac Marovitz --- core/rend/metal/metal_driver.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/rend/metal/metal_driver.h b/core/rend/metal/metal_driver.h index d6c234ffe5..9e8bd51aaa 100644 --- a/core/rend/metal/metal_driver.h +++ b/core/rend/metal/metal_driver.h @@ -92,7 +92,7 @@ class MetalDriver final : public ImGuiDriver { texture.texture->tex_type = TextureType::_8888; texture.texture->UploadToGPU(width, height, data, false); - ImTextureID textureID = texture.texture->texture; + auto textureID = (ImTextureID)(intptr_t)(__bridge void*)texture.texture->texture; textures[name] = std::move(texture); From c6bdb54390e9a73deeca42b652e64444ec4381a1 Mon Sep 17 00:00:00 2001 From: Isaac Marovitz Date: Tue, 20 May 2025 21:15:20 -0400 Subject: [PATCH 07/48] Include vector, rename conflicting types Signed-off-by: Isaac Marovitz --- core/hw/pvr/pvr_regs.h | 12 ++++++------ core/hw/pvr/ta_ctx.h | 4 ++-- core/rend/metal/metal_buffer.h | 1 + 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/core/hw/pvr/pvr_regs.h b/core/hw/pvr/pvr_regs.h index 02faa6fabc..fa33d9838c 100644 --- a/core/hw/pvr/pvr_regs.h +++ b/core/hw/pvr/pvr_regs.h @@ -151,7 +151,7 @@ struct VO_BORDER_COL_type : RGBAColorTemplate }; }; -struct RGBColor : RGBAColorTemplate +struct RGB_Color : RGBAColorTemplate { union { struct @@ -165,7 +165,7 @@ struct RGBColor : RGBAColorTemplate }; }; -struct RGBAColor : RGBAColorTemplate +struct RGBA_Color : RGBAColorTemplate { union { struct @@ -531,11 +531,11 @@ union FOG_DENSITY_type #define SDRAM_ARB_CFG PvrReg(SDRAM_ARB_CFG_addr,u32) // RW Texture memory arbiter control #define SDRAM_CFG PvrReg(SDRAM_CFG_addr,u32) // RW Texture memory control -#define FOG_COL_RAM PvrReg(FOG_COL_RAM_addr, RGBColor) // RW Color for Look Up table Fog -#define FOG_COL_VERT PvrReg(FOG_COL_VERT_addr, RGBColor) // RW Color for vertex Fog +#define FOG_COL_RAM PvrReg(FOG_COL_RAM_addr, RGB_Color) // RW Color for Look Up table Fog +#define FOG_COL_VERT PvrReg(FOG_COL_VERT_addr, RGB_Color) // RW Color for vertex Fog #define FOG_DENSITY PvrReg(FOG_DENSITY_addr, FOG_DENSITY_type) // RW Fog scale value -#define FOG_CLAMP_MAX PvrReg(FOG_CLAMP_MAX_addr, RGBAColor) // RW Color clamping maximum value -#define FOG_CLAMP_MIN PvrReg(FOG_CLAMP_MIN_addr, RGBAColor) // RW Color clamping minimum value +#define FOG_CLAMP_MAX PvrReg(FOG_CLAMP_MAX_addr, RGBA_Color) // RW Color clamping maximum value +#define FOG_CLAMP_MIN PvrReg(FOG_CLAMP_MIN_addr, RGBA_Color) // RW Color clamping minimum value #define SPG_TRIGGER_POS PvrReg(SPG_TRIGGER_POS_addr,u32) // RW External trigger signal HV counter value #define SPG_HBLANK_INT PvrReg(SPG_HBLANK_INT_addr,SPG_HBLANK_INT_type) // RW H-blank interrupt control #define SPG_VBLANK_INT PvrReg(SPG_VBLANK_INT_addr,SPG_VBLANK_INT_type) // RW V-blank interrupt control diff --git a/core/hw/pvr/ta_ctx.h b/core/hw/pvr/ta_ctx.h index b12df6e1b4..6507d2ff9d 100644 --- a/core/hw/pvr/ta_ctx.h +++ b/core/hw/pvr/ta_ctx.h @@ -244,8 +244,8 @@ struct rend_context u32 framebufferWidth; u32 framebufferHeight; - RGBAColor fog_clamp_min; - RGBAColor fog_clamp_max; + RGBA_Color fog_clamp_min; + RGBA_Color fog_clamp_max; std::vector verts; std::vector idx; diff --git a/core/rend/metal/metal_buffer.h b/core/rend/metal/metal_buffer.h index 3d94da0890..32bbc04114 100644 --- a/core/rend/metal/metal_buffer.h +++ b/core/rend/metal/metal_buffer.h @@ -19,6 +19,7 @@ #pragma once #include "types.h" #include +#include struct MetalBufferData { From 1100747f618d4b8f24e9d9c04f6c91849b4eed35 Mon Sep 17 00:00:00 2001 From: Isaac Marovitz Date: Wed, 21 May 2025 12:17:36 -0400 Subject: [PATCH 08/48] Update imgui metal backend + Fix texture handles Signed-off-by: Isaac Marovitz --- core/deps/imgui/backends/imgui_impl_metal.h | 3 +- core/deps/imgui/backends/imgui_impl_metal.mm | 146 ++++++++++--------- core/rend/metal/metal_buffer.mm | 1 - core/rend/metal/metal_context.h | 2 +- core/rend/metal/metal_context.mm | 2 + core/rend/metal/metal_driver.h | 2 +- core/rend/metal/metal_texture.h | 1 - core/rend/metal/metal_texture.mm | 3 +- 8 files changed, 82 insertions(+), 78 deletions(-) diff --git a/core/deps/imgui/backends/imgui_impl_metal.h b/core/deps/imgui/backends/imgui_impl_metal.h index 351c2eff73..d0debff0dc 100644 --- a/core/deps/imgui/backends/imgui_impl_metal.h +++ b/core/deps/imgui/backends/imgui_impl_metal.h @@ -13,6 +13,7 @@ // - Documentation https://dearimgui.com/docs (same as your local docs/ folder). // - Introduction, links and more at the top of imgui.cpp +#pragma once #include "imgui.h" // IMGUI_IMPL_API #ifndef IMGUI_DISABLE @@ -71,4 +72,4 @@ IMGUI_IMPL_API void ImGui_ImplMetal_DestroyDeviceObjects(); //----------------------------------------------------------------------------- -#endif // #ifndef IMGUI_DISABLE +#endif // #ifndef IMGUI_DISABLE \ No newline at end of file diff --git a/core/deps/imgui/backends/imgui_impl_metal.mm b/core/deps/imgui/backends/imgui_impl_metal.mm index 5680dea9d4..2a476c093f 100644 --- a/core/deps/imgui/backends/imgui_impl_metal.mm +++ b/core/deps/imgui/backends/imgui_impl_metal.mm @@ -15,6 +15,8 @@ // CHANGELOG // (minor and older changes stripped away, please see git history for details) +// 2025-02-03: Metal: Crash fix. (#8367) +// 2024-01-08: Metal: Fixed memory leaks when using metal-cpp (#8276, #8166) or when using multiple contexts (#7419). // 2022-08-23: Metal: Update deprecated property 'sampleCount'->'rasterSampleCount'. // 2022-07-05: Metal: Add dispatch synchronization. // 2022-06-30: Metal: Use __bridge for ARC based systems. @@ -142,6 +144,7 @@ bool ImGui_ImplMetal_Init(id device) void ImGui_ImplMetal_Shutdown() { ImGui_ImplMetal_Data* bd = ImGui_ImplMetal_GetBackendData(); + IM_UNUSED(bd); IM_ASSERT(bd != nullptr && "No renderer backend to shutdown, or already shutdown?"); ImGui_ImplMetal_DestroyDeviceObjects(); ImGui_ImplMetal_DestroyBackendData(); @@ -156,15 +159,18 @@ void ImGui_ImplMetal_NewFrame(MTLRenderPassDescriptor* renderPassDescriptor) { ImGui_ImplMetal_Data* bd = ImGui_ImplMetal_GetBackendData(); IM_ASSERT(bd != nil && "Context or backend not initialized! Did you call ImGui_ImplMetal_Init()?"); +#ifdef IMGUI_IMPL_METAL_CPP + bd->SharedMetalContext.framebufferDescriptor = [[[FramebufferDescriptor alloc] initWithRenderPassDescriptor:renderPassDescriptor]autorelease]; +#else bd->SharedMetalContext.framebufferDescriptor = [[FramebufferDescriptor alloc] initWithRenderPassDescriptor:renderPassDescriptor]; - +#endif if (bd->SharedMetalContext.depthStencilState == nil) ImGui_ImplMetal_CreateDeviceObjects(bd->SharedMetalContext.device); } static void ImGui_ImplMetal_SetupRenderState(ImDrawData* drawData, id commandBuffer, - id commandEncoder, id renderPipelineState, - MetalBuffer* vertexBuffer, size_t vertexBufferOffset) + id commandEncoder, id renderPipelineState, + MetalBuffer* vertexBuffer, size_t vertexBufferOffset) { IM_UNUSED(commandBuffer); ImGui_ImplMetal_Data* bd = ImGui_ImplMetal_GetBackendData(); @@ -175,14 +181,14 @@ static void ImGui_ImplMetal_SetupRenderState(ImDrawData* drawData, idDisplayPos (top left) to // draw_data->DisplayPos+data_data->DisplaySize (bottom right). DisplayMin is typically (0,0) for single viewport apps. MTLViewport viewport = - { - .originX = 0.0, - .originY = 0.0, - .width = (double)(drawData->DisplaySize.x * drawData->FramebufferScale.x), - .height = (double)(drawData->DisplaySize.y * drawData->FramebufferScale.y), - .znear = 0.0, - .zfar = 1.0 - }; + { + .originX = 0.0, + .originY = 0.0, + .width = (double)(drawData->DisplaySize.x * drawData->FramebufferScale.x), + .height = (double)(drawData->DisplaySize.y * drawData->FramebufferScale.y), + .znear = 0.0, + .zfar = 1.0 + }; [commandEncoder setViewport:viewport]; float L = drawData->DisplayPos.x; @@ -192,12 +198,12 @@ static void ImGui_ImplMetal_SetupRenderState(ImDrawData* drawData, id c // Apply scissor/clipping rectangle MTLScissorRect scissorRect = - { - .x = NSUInteger(clip_min.x), - .y = NSUInteger(clip_min.y), - .width = NSUInteger(clip_max.x - clip_min.x), - .height = NSUInteger(clip_max.y - clip_min.y) - }; + { + .x = NSUInteger(clip_min.x), + .y = NSUInteger(clip_min.y), + .width = NSUInteger(clip_max.x - clip_min.x), + .height = NSUInteger(clip_max.y - clip_min.y) + }; [commandEncoder setScissorRect:scissorRect]; // Bind texture, Draw @@ -306,25 +312,21 @@ void ImGui_ImplMetal_RenderDrawData(ImDrawData* drawData, id c indexBufferOffset += (size_t)draw_list->IdxBuffer.Size * sizeof(ImDrawIdx); } + MetalContext* sharedMetalContext = bd->SharedMetalContext; [commandBuffer addCompletedHandler:^(id) { dispatch_async(dispatch_get_main_queue(), ^{ - ImGui_ImplMetal_Data* bd = ImGui_ImplMetal_GetBackendData(); - if (bd != nullptr) + @synchronized(sharedMetalContext.bufferCache) { - @synchronized(bd->SharedMetalContext.bufferCache) - { - [bd->SharedMetalContext.bufferCache addObject:vertexBuffer]; - [bd->SharedMetalContext.bufferCache addObject:indexBuffer]; - } + [sharedMetalContext.bufferCache addObject:vertexBuffer]; + [sharedMetalContext.bufferCache addObject:indexBuffer]; } }); }]; } bool ImGui_ImplMetal_CreateFontsTexture(id device) -{ - ImGui_ImplMetal_Data* bd = ImGui_ImplMetal_GetBackendData(); +{ ImGui_ImplMetal_Data* bd = ImGui_ImplMetal_GetBackendData(); ImGuiIO& io = ImGui::GetIO(); // We are retrieving and uploading the font atlas as a 4-channels RGBA texture here. @@ -367,8 +369,10 @@ bool ImGui_ImplMetal_CreateDeviceObjects(id device) depthStencilDescriptor.depthWriteEnabled = NO; depthStencilDescriptor.depthCompareFunction = MTLCompareFunctionAlways; bd->SharedMetalContext.depthStencilState = [device newDepthStencilStateWithDescriptor:depthStencilDescriptor]; +#ifdef IMGUI_IMPL_METAL_CPP + [depthStencilDescriptor release]; +#endif ImGui_ImplMetal_CreateFontsTexture(device); - return true; } @@ -434,9 +438,9 @@ - (BOOL)isEqual:(id)object if (![other isKindOfClass:[FramebufferDescriptor class]]) return NO; return other.sampleCount == self.sampleCount && - other.colorPixelFormat == self.colorPixelFormat && - other.depthPixelFormat == self.depthPixelFormat && - other.stencilPixelFormat == self.stencilPixelFormat; + other.colorPixelFormat == self.colorPixelFormat && + other.depthPixelFormat == self.depthPixelFormat && + other.stencilPixelFormat == self.stencilPixelFormat; } @end @@ -497,40 +501,40 @@ - (MetalBuffer*)dequeueReusableBufferOfLength:(NSUInteger)length device:(id\n" - "using namespace metal;\n" - "\n" - "struct Uniforms {\n" - " float4x4 projectionMatrix;\n" - "};\n" - "\n" - "struct VertexIn {\n" - " float2 position [[attribute(0)]];\n" - " float2 texCoords [[attribute(1)]];\n" - " uchar4 color [[attribute(2)]];\n" - "};\n" - "\n" - "struct VertexOut {\n" - " float4 position [[position]];\n" - " float2 texCoords;\n" - " float4 color;\n" - "};\n" - "\n" - "vertex VertexOut vertex_main(VertexIn in [[stage_in]],\n" - " constant Uniforms &uniforms [[buffer(1)]]) {\n" - " VertexOut out;\n" - " out.position = uniforms.projectionMatrix * float4(in.position, 0, 1);\n" - " out.texCoords = in.texCoords;\n" - " out.color = float4(in.color) / float4(255.0);\n" - " return out;\n" - "}\n" - "\n" - "fragment half4 fragment_main(VertexOut in [[stage_in]],\n" - " texture2d texture [[texture(0)]]) {\n" - " constexpr sampler linearSampler(coord::normalized, min_filter::linear, mag_filter::linear, mip_filter::linear);\n" - " half4 texColor = texture.sample(linearSampler, in.texCoords);\n" - " return half4(in.color) * texColor;\n" - "}\n"; + "#include \n" + "using namespace metal;\n" + "\n" + "struct Uniforms {\n" + " float4x4 projectionMatrix;\n" + "};\n" + "\n" + "struct VertexIn {\n" + " float2 position [[attribute(0)]];\n" + " float2 texCoords [[attribute(1)]];\n" + " uchar4 color [[attribute(2)]];\n" + "};\n" + "\n" + "struct VertexOut {\n" + " float4 position [[position]];\n" + " float2 texCoords;\n" + " float4 color;\n" + "};\n" + "\n" + "vertex VertexOut vertex_main(VertexIn in [[stage_in]],\n" + " constant Uniforms &uniforms [[buffer(1)]]) {\n" + " VertexOut out;\n" + " out.position = uniforms.projectionMatrix * float4(in.position, 0, 1);\n" + " out.texCoords = in.texCoords;\n" + " out.color = float4(in.color) / float4(255.0);\n" + " return out;\n" + "}\n" + "\n" + "fragment half4 fragment_main(VertexOut in [[stage_in]],\n" + " texture2d texture [[texture(0)]]) {\n" + " constexpr sampler linearSampler(coord::normalized, min_filter::linear, mag_filter::linear, mip_filter::linear);\n" + " half4 texColor = texture.sample(linearSampler, in.texCoords);\n" + " return half4(in.color) * texColor;\n" + "}\n"; id library = [device newLibraryWithSource:shaderSource options:nil error:&error]; if (library == nil) @@ -589,4 +593,4 @@ - (MetalBuffer*)dequeueReusableBufferOfLength:(NSUInteger)length device:(id device = MTLCreateSystemDefaultDevice(); + id device = nil; id queue = nil; CAMetalLayer* layer; #endif diff --git a/core/rend/metal/metal_context.mm b/core/rend/metal/metal_context.mm index bbf8b11fbb..33ac26b892 100644 --- a/core/rend/metal/metal_context.mm +++ b/core/rend/metal/metal_context.mm @@ -42,6 +42,8 @@ layer = static_cast(SDL_Metal_GetLayer(view)); #endif + device = MTLCreateSystemDefaultDevice(); + if (!device) { term(); NOTICE_LOG(RENDERER, "Metal Device is null."); diff --git a/core/rend/metal/metal_driver.h b/core/rend/metal/metal_driver.h index 9e8bd51aaa..33f01cd3ed 100644 --- a/core/rend/metal/metal_driver.h +++ b/core/rend/metal/metal_driver.h @@ -82,7 +82,7 @@ class MetalDriver final : public ImGuiDriver { ImTextureID getTexture(const std::string &name) override { auto it = textures.find(name); if (it != textures.end()) - return &it->second.texture; + return it->second.texture->texture; return ImTextureID{}; } diff --git a/core/rend/metal/metal_texture.h b/core/rend/metal/metal_texture.h index 99cc6f1923..c5dd36b57b 100644 --- a/core/rend/metal/metal_texture.h +++ b/core/rend/metal/metal_texture.h @@ -28,7 +28,6 @@ class MetalTexture final : public BaseTextureCacheData MetalTexture(TSP tsp = {}, TCW tcw = {}) : BaseTextureCacheData(tsp, tcw) {} id texture; - std::string GetId() override { return std::to_string([texture gpuResourceID]._impl); } void UploadToGPU(int width, int height, const u8 *temp_tex_buffer, bool mipmapped, bool mipmapsIncluded = false) override; bool Delete() override; diff --git a/core/rend/metal/metal_texture.mm b/core/rend/metal/metal_texture.mm index 518200f0aa..2c2912dc75 100644 --- a/core/rend/metal/metal_texture.mm +++ b/core/rend/metal/metal_texture.mm @@ -70,8 +70,7 @@ texture = [device newTextureWithDescriptor:desc]; - MTLRegion region = { 0, 0, static_cast(width), static_cast(height) }; - [texture replaceRegion:region mipmapLevel:0 withBytes:temp_tex_buffer bytesPerRow:bpp * width]; + [texture replaceRegion:MTLRegionMake2D(0, 0, width, height) mipmapLevel:0 withBytes:temp_tex_buffer bytesPerRow:bpp * width]; } bool MetalTexture::Delete() From f112c3e3c70e419980837e9afd8019364d3429ed Mon Sep 17 00:00:00 2001 From: Isaac Marovitz Date: Wed, 21 May 2025 13:01:24 -0400 Subject: [PATCH 09/48] Get Metal window size (not scaled properly) Signed-off-by: Isaac Marovitz --- core/sdl/sdl.cpp | 5 +++++ core/wsi/switcher.cpp | 1 - 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/core/sdl/sdl.cpp b/core/sdl/sdl.cpp index a43fa8b5a0..37334750e9 100644 --- a/core/sdl/sdl.cpp +++ b/core/sdl/sdl.cpp @@ -399,6 +399,11 @@ void input_sdl_handle() if (windowFlags & SDL_WINDOW_OPENGL) SDL_GL_GetDrawableSize(window, &settings.display.width, &settings.display.height); else +#endif +#ifdef USE_METAL + if (windowFlags & SDL_WINDOW_METAL) + SDL_Metal_GetDrawableSize(window, &settings.display.width, &settings.display.height); + else #endif SDL_GetWindowSize(window, &settings.display.width, &settings.display.height); GraphicsContext::Instance()->resize(); diff --git a/core/wsi/switcher.cpp b/core/wsi/switcher.cpp index de8e65fce2..075a10a330 100644 --- a/core/wsi/switcher.cpp +++ b/core/wsi/switcher.cpp @@ -77,7 +77,6 @@ void initRenderApi(void *window, void *display) if (isMetal(config::RendererType)) { theMetalContext.setWindow(window, display); - if (theMetalContext.init()) return; // Fall back to OpenGL From b689dbca5d5b64c25b50a524af15f1ad557cd77f Mon Sep 17 00:00:00 2001 From: Isaac Marovitz Date: Wed, 21 May 2025 13:34:33 -0400 Subject: [PATCH 10/48] Fix explicit cast Signed-off-by: Isaac Marovitz --- core/rend/metal/metal_driver.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/rend/metal/metal_driver.h b/core/rend/metal/metal_driver.h index 33f01cd3ed..ca596179ce 100644 --- a/core/rend/metal/metal_driver.h +++ b/core/rend/metal/metal_driver.h @@ -82,7 +82,7 @@ class MetalDriver final : public ImGuiDriver { ImTextureID getTexture(const std::string &name) override { auto it = textures.find(name); if (it != textures.end()) - return it->second.texture->texture; + return (ImTextureID)(intptr_t)(__bridge void*)it->second.texture->texture; return ImTextureID{}; } From 8e8ebc1766ce2f4d7c974566702bac8d5fb8c66a Mon Sep 17 00:00:00 2001 From: Isaac Marovitz Date: Wed, 21 May 2025 13:49:49 -0400 Subject: [PATCH 11/48] Hack to fix metal_context.h Signed-off-by: Isaac Marovitz --- core/rend/metal/metal_context.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/core/rend/metal/metal_context.h b/core/rend/metal/metal_context.h index 554966dcef..188e625b18 100644 --- a/core/rend/metal/metal_context.h +++ b/core/rend/metal/metal_context.h @@ -39,6 +39,12 @@ class MetalContext : public GraphicsContext CAMetalLayer* GetLayer() const { return layer; } id GetQueue() const { return queue; } id commandBuffer = nil; +// Hack to make sure everything lines up when in pure C++ +#else + void* GetDevice() const { return device; } + void* GetLayer() const { return layer; } + void* GetQueue() const { return queue; } + void* commandBuffer = nullptr; #endif void resize() override; void Present(); @@ -63,6 +69,10 @@ class MetalContext : public GraphicsContext id device = nil; id queue = nil; CAMetalLayer* layer; +#else + void* device = nullptr; + void* queue = nullptr; + void* layer = nullptr; #endif static MetalContext* contextInstance; }; \ No newline at end of file From a3db31a173ea89afdcfa9016d2b4fe6d20498536 Mon Sep 17 00:00:00 2001 From: Isaac Marovitz Date: Wed, 21 May 2025 14:18:38 -0400 Subject: [PATCH 12/48] Clamp imgui sampler to border Signed-off-by: Isaac Marovitz --- core/deps/imgui/backends/imgui_impl_metal.mm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/deps/imgui/backends/imgui_impl_metal.mm b/core/deps/imgui/backends/imgui_impl_metal.mm index 2a476c093f..810c696e9a 100644 --- a/core/deps/imgui/backends/imgui_impl_metal.mm +++ b/core/deps/imgui/backends/imgui_impl_metal.mm @@ -531,7 +531,7 @@ - (MetalBuffer*)dequeueReusableBufferOfLength:(NSUInteger)length device:(id texture [[texture(0)]]) {\n" - " constexpr sampler linearSampler(coord::normalized, min_filter::linear, mag_filter::linear, mip_filter::linear);\n" + " constexpr sampler linearSampler(coord::normalized, address::clamp_to_border, min_filter::linear, mag_filter::linear, mip_filter::linear);\n" " half4 texColor = texture.sample(linearSampler, in.texCoords);\n" " return half4(in.color) * texColor;\n" "}\n"; From 5fb1d7d0e1db76648c62d72508bc97848251031d Mon Sep 17 00:00:00 2001 From: Isaac Marovitz Date: Wed, 21 May 2025 15:19:27 -0400 Subject: [PATCH 13/48] Fix iOS SDL include Signed-off-by: Isaac Marovitz --- core/rend/metal/metal_context.mm | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/core/rend/metal/metal_context.mm b/core/rend/metal/metal_context.mm index 33ac26b892..19ec1e81ba 100644 --- a/core/rend/metal/metal_context.mm +++ b/core/rend/metal/metal_context.mm @@ -19,7 +19,9 @@ #include "metal_context.h" #include "metal_driver.h" +#ifdef USE_SDL #include "sdl/sdl.h" +#endif #include "ui/imgui_driver.h" MetalContext *MetalContext::contextInstance; @@ -27,7 +29,7 @@ bool MetalContext::init() { GraphicsContext::instance = this; -#if defined(USE_SDL) +#ifdef USE_SDL if (!sdl_recreate_window(SDL_WINDOW_METAL)) return false; From cce33de4a27b98b05bda888fbe3ebd38cc184a01 Mon Sep 17 00:00:00 2001 From: Isaac Marovitz Date: Wed, 21 May 2025 16:01:28 -0400 Subject: [PATCH 14/48] Rendering again Signed-off-by: Isaac Marovitz --- core/rend/metal/metal_context.h | 2 - core/rend/metal/metal_context.mm | 1 - core/rend/metal/metal_driver.h | 15 ++-- core/rend/metal/metal_renderer.mm | 122 +++++++++++++++--------------- 4 files changed, 67 insertions(+), 73 deletions(-) diff --git a/core/rend/metal/metal_context.h b/core/rend/metal/metal_context.h index 188e625b18..126e8780a9 100644 --- a/core/rend/metal/metal_context.h +++ b/core/rend/metal/metal_context.h @@ -38,13 +38,11 @@ class MetalContext : public GraphicsContext id GetDevice() const { return device; } CAMetalLayer* GetLayer() const { return layer; } id GetQueue() const { return queue; } - id commandBuffer = nil; // Hack to make sure everything lines up when in pure C++ #else void* GetDevice() const { return device; } void* GetLayer() const { return layer; } void* GetQueue() const { return queue; } - void* commandBuffer = nullptr; #endif void resize() override; void Present(); diff --git a/core/rend/metal/metal_context.mm b/core/rend/metal/metal_context.mm index 19ec1e81ba..96c81ff47e 100644 --- a/core/rend/metal/metal_context.mm +++ b/core/rend/metal/metal_context.mm @@ -54,7 +54,6 @@ [layer setDevice:device]; queue = [device newCommandQueue]; - commandBuffer = [queue commandBuffer]; NOTICE_LOG(RENDERER, "Created Metal view."); diff --git a/core/rend/metal/metal_driver.h b/core/rend/metal/metal_driver.h index ca596179ce..33c236aee5 100644 --- a/core/rend/metal/metal_driver.h +++ b/core/rend/metal/metal_driver.h @@ -39,8 +39,7 @@ class MetalDriver final : public ImGuiDriver { void newFrame() override { MetalContext *context = MetalContext::Instance(); drawable = [context->GetLayer() nextDrawable]; - - MTLRenderPassDescriptor *descriptor = [[MTLRenderPassDescriptor alloc] init]; + descriptor = [[MTLRenderPassDescriptor alloc] init]; [descriptor setDefaultRasterSampleCount:1]; @@ -50,25 +49,21 @@ class MetalDriver final : public ImGuiDriver { [color setLoadAction:MTLLoadActionClear]; [color setStoreAction:MTLStoreActionStore]; - commandEncoder = [context->commandBuffer renderCommandEncoderWithDescriptor:descriptor]; - ImGui_ImplMetal_NewFrame(descriptor); } void renderDrawData(ImDrawData *drawData, bool gui_open) override { MetalContext *context = MetalContext::Instance(); - id buffer = context->commandBuffer; + id buffer = [context->GetQueue() commandBuffer]; + id commandEncoder = [buffer renderCommandEncoderWithDescriptor:descriptor]; ImGui_ImplMetal_RenderDrawData(drawData, buffer, commandEncoder); [commandEncoder endEncoding]; + commandEncoder = nil; [buffer presentDrawable:drawable]; [buffer commit]; - commandEncoder = nil; - - context->commandBuffer = [context->GetQueue() commandBuffer]; - if (gui_open) frameRendered = true; } @@ -114,7 +109,7 @@ class MetalDriver final : public ImGuiDriver { }; bool frameRendered = false; - id commandEncoder; + MTLRenderPassDescriptor* descriptor; id drawable; std::unordered_map textures; }; diff --git a/core/rend/metal/metal_renderer.mm b/core/rend/metal/metal_renderer.mm index 1cee657fba..8f3f3d6c2e 100644 --- a/core/rend/metal/metal_renderer.mm +++ b/core/rend/metal/metal_renderer.mm @@ -493,7 +493,7 @@ auto drawable = [MetalContext::Instance()->GetLayer() nextDrawable]; - id buffer = MetalContext::Instance()->commandBuffer; + id buffer = [MetalContext::Instance()->GetQueue() commandBuffer]; MTLRenderPassDescriptor *descriptor = [[MTLRenderPassDescriptor alloc] init]; auto color = [descriptor colorAttachments][0]; [color setTexture:frameBuffer]; @@ -513,60 +513,62 @@ [descriptor setDepthAttachment:depthAttachmentDescriptor]; [descriptor setStencilAttachment:stencilAttachmentDescriptor]; - id renderEncoder = [buffer renderCommandEncoderWithDescriptor:descriptor]; - - [renderEncoder setFragmentTexture:fogTexture->texture atIndex:2]; - [renderEncoder setFragmentTexture:paletteTexture->texture atIndex:3]; - - // Fog sampler - TSP fogTsp = {}; - fogTsp.FilterMode = 1; - fogTsp.ClampU = 1; - fogTsp.ClampV = 1; - [renderEncoder setFragmentSamplerState:samplers.GetSampler(fogTsp) atIndex:2]; - - // Palette sampler - TSP palTsp = {}; - palTsp.FilterMode = 0; - palTsp.ClampU = 1; - palTsp.ClampV = 1; - [renderEncoder setFragmentSamplerState:samplers.GetSampler(palTsp) atIndex:3]; - - // Upload vertex and index buffers - VertexShaderUniforms vtxUniforms {}; - vtxUniforms.ndcMat = matrices.GetNormalMatrix(); - - UploadMainBuffer(vtxUniforms, fragUniforms); - - [renderEncoder setVertexBuffer:curMainBuffer offset:0 atIndex:30]; - [renderEncoder setVertexBuffer:curMainBuffer offset:offsets.vertexUniformOffset atIndex:0]; - [renderEncoder setFragmentBuffer:curMainBuffer offset:offsets.fragmentUniformOffset atIndex:0]; - - RenderPass previous_pass {}; - for (int render_pass = 0; render_pass < (int)pvrrc.render_passes.size(); render_pass++) { - const RenderPass& current_pass = pvrrc.render_passes[render_pass]; - - DEBUG_LOG(RENDERER, "Render pass %d OP %d PT %d TR %d MV %d autosort %d", render_pass + 1, - current_pass.op_count - previous_pass.op_count, - current_pass.pt_count - previous_pass.pt_count, - current_pass.tr_count - previous_pass.tr_count, - current_pass.mvo_count - previous_pass.mvo_count, current_pass.autosort); - DrawList(renderEncoder, ListType_Opaque, false, pvrrc.global_param_op, previous_pass.op_count, current_pass.op_count); - DrawList(renderEncoder, ListType_Punch_Through, false, pvrrc.global_param_pt, previous_pass.pt_count, current_pass.pt_count); - DrawModVols(renderEncoder, previous_pass.mvo_count, current_pass.mvo_count - previous_pass.mvo_count); - if (current_pass.autosort) { - if (!config::PerStripSorting) - DrawSorted(renderEncoder, pvrrc.sortedTriangles, previous_pass.sorted_tr_count, current_pass.sorted_tr_count, render_pass + 1 < (int)pvrrc.render_passes.size()); - else - DrawList(renderEncoder, ListType_Translucent, true, pvrrc.global_param_tr, previous_pass.tr_count, current_pass.tr_count); - } else { - // TODO: This breaking? - // DrawList(renderEncoder, ListType_Translucent, false, pvrrc.global_param_tr, previous_pass.tr_count, current_pass.tr_count); + @autoreleasepool { + id renderEncoder = [buffer renderCommandEncoderWithDescriptor:descriptor]; + + [renderEncoder setFragmentTexture:fogTexture->texture atIndex:2]; + [renderEncoder setFragmentTexture:paletteTexture->texture atIndex:3]; + + // Fog sampler + TSP fogTsp = {}; + fogTsp.FilterMode = 1; + fogTsp.ClampU = 1; + fogTsp.ClampV = 1; + [renderEncoder setFragmentSamplerState:samplers.GetSampler(fogTsp) atIndex:2]; + + // Palette sampler + TSP palTsp = {}; + palTsp.FilterMode = 0; + palTsp.ClampU = 1; + palTsp.ClampV = 1; + [renderEncoder setFragmentSamplerState:samplers.GetSampler(palTsp) atIndex:3]; + + // Upload vertex and index buffers + VertexShaderUniforms vtxUniforms {}; + vtxUniforms.ndcMat = matrices.GetNormalMatrix(); + + UploadMainBuffer(vtxUniforms, fragUniforms); + + [renderEncoder setVertexBuffer:curMainBuffer offset:0 atIndex:30]; + [renderEncoder setVertexBuffer:curMainBuffer offset:offsets.vertexUniformOffset atIndex:0]; + [renderEncoder setFragmentBuffer:curMainBuffer offset:offsets.fragmentUniformOffset atIndex:0]; + + RenderPass previous_pass {}; + for (int render_pass = 0; render_pass < (int)pvrrc.render_passes.size(); render_pass++) { + const RenderPass& current_pass = pvrrc.render_passes[render_pass]; + + DEBUG_LOG(RENDERER, "Render pass %d OP %d PT %d TR %d MV %d autosort %d", render_pass + 1, + current_pass.op_count - previous_pass.op_count, + current_pass.pt_count - previous_pass.pt_count, + current_pass.tr_count - previous_pass.tr_count, + current_pass.mvo_count - previous_pass.mvo_count, current_pass.autosort); + DrawList(renderEncoder, ListType_Opaque, false, pvrrc.global_param_op, previous_pass.op_count, current_pass.op_count); + DrawList(renderEncoder, ListType_Punch_Through, false, pvrrc.global_param_pt, previous_pass.pt_count, current_pass.pt_count); + DrawModVols(renderEncoder, previous_pass.mvo_count, current_pass.mvo_count - previous_pass.mvo_count); + if (current_pass.autosort) { + if (!config::PerStripSorting) + DrawSorted(renderEncoder, pvrrc.sortedTriangles, previous_pass.sorted_tr_count, current_pass.sorted_tr_count, render_pass + 1 < (int)pvrrc.render_passes.size()); + else + DrawList(renderEncoder, ListType_Translucent, true, pvrrc.global_param_tr, previous_pass.tr_count, current_pass.tr_count); + } else { + // TODO: This breaking? + // DrawList(renderEncoder, ListType_Translucent, false, pvrrc.global_param_tr, previous_pass.tr_count, current_pass.tr_count); + } + previous_pass = current_pass; } - previous_pass = current_pass; - } - [renderEncoder endEncoding]; + [renderEncoder endEncoding]; + } // Blit to framebuffer descriptor = [[MTLRenderPassDescriptor alloc] init]; @@ -575,19 +577,19 @@ [color setLoadAction:MTLLoadActionClear]; [color setStoreAction:MTLStoreActionStore]; - renderEncoder = [buffer renderCommandEncoderWithDescriptor:descriptor]; + @autoreleasepool { + id renderEncoder = [buffer renderCommandEncoderWithDescriptor:descriptor]; - [renderEncoder setRenderPipelineState:pipelineManager.GetBlitPassPipeline()]; - [renderEncoder setFragmentTexture:frameBuffer atIndex:0]; - [renderEncoder drawPrimitives: MTLPrimitiveTypeTriangleStrip vertexStart:0 vertexCount:4]; - [renderEncoder endEncoding]; + [renderEncoder setRenderPipelineState:pipelineManager.GetBlitPassPipeline()]; + [renderEncoder setFragmentTexture:frameBuffer atIndex:0]; + [renderEncoder drawPrimitives: MTLPrimitiveTypeTriangleStrip vertexStart:0 vertexCount:4]; + [renderEncoder endEncoding]; + } [buffer presentDrawable:drawable]; [buffer commit]; DEBUG_LOG(RENDERER, "Render command buffer released"); - - MetalContext::Instance()->commandBuffer = [MetalContext::Instance()->GetQueue() commandBuffer]; return !pvrrc.isRTT; } From a65cf12a88bbdf668779667d1a747fc7849bc9b9 Mon Sep 17 00:00:00 2001 From: Isaac Marovitz Date: Wed, 21 May 2025 16:19:11 -0400 Subject: [PATCH 15/48] Fix missing NSString -> char* Signed-off-by: Isaac Marovitz --- core/rend/metal/metal_pipeline.mm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/rend/metal/metal_pipeline.mm b/core/rend/metal/metal_pipeline.mm index b902edf046..ea6e0c1adb 100644 --- a/core/rend/metal/metal_pipeline.mm +++ b/core/rend/metal/metal_pipeline.mm @@ -40,7 +40,7 @@ auto state = [MetalContext::Instance()->GetDevice() newRenderPipelineStateWithDescriptor:descriptor error:&error]; if (state == nil) { - ERROR_LOG(RENDERER, "Failed to create Blit Pipeline State: %s", [error localizedDescription]);; + ERROR_LOG(RENDERER, "Failed to create Blit Pipeline State: %s", [[error localizedDescription] UTF8String]);; } blitPassPipeline = state; From 640c206b48262e5386e0df5b90804970e4c38000 Mon Sep 17 00:00:00 2001 From: Isaac Marovitz Date: Wed, 21 May 2025 16:50:42 -0400 Subject: [PATCH 16/48] Gouraud Interpolation Signed-off-by: Isaac Marovitz --- core/rend/metal/metal_shaders.mm | 50 +++++++++++++++++++++++--------- 1 file changed, 37 insertions(+), 13 deletions(-) diff --git a/core/rend/metal/metal_shaders.mm b/core/rend/metal/metal_shaders.mm index 9b890fb6d1..4ccb3778f8 100644 --- a/core/rend/metal/metal_shaders.mm +++ b/core/rend/metal/metal_shaders.mm @@ -30,6 +30,9 @@ constant bool pp_gouraud [[function_constant(0)]]; constant bool div_pos_z [[function_constant(1)]]; +constant bool is_flat = pp_gouraud == 0; +constant bool is_not_flag = !is_flat; + struct VertexShaderUniforms { float4x4 ndc_mat; @@ -45,9 +48,10 @@ struct VertexOut { - // TODO: Interpolation mode - float4 vtx_base; - float4 vtx_offs; + float4 flat_vtx_base [[flat, function_constant(is_flat)]]; + float4 flat_vtx_offs [[flat, function_constant(is_flat)]]; + float4 vtx_base [[function_constant(is_not_flag)]]; + float4 vtx_offs [[function_constant(is_not_flag)]]; float3 vtx_uv; float4 position [[position]]; }; @@ -62,13 +66,23 @@ vertex VertexOut vs_main(VertexIn in [[stage_in]], constant VertexShaderUniforms } VertexOut out = {}; - out.vtx_base = in.in_base; - out.vtx_offs = in.in_offs; + if (is_flat) { + out.flat_vtx_base = in.in_base; + out.flat_vtx_offs = in.in_offs; + } else { + out.vtx_base = in.in_base; + out.vtx_offs = in.in_offs; + } out.vtx_uv = float3(in.in_uv, vpos.z); if (pp_gouraud && !div_pos_z) { - out.vtx_base *= vpos.z; - out.vtx_offs *= vpos.z; + if (is_flat) { + out.flat_vtx_base *= vpos.z; + out.flat_vtx_offs *= vpos.z; + } else { + out.vtx_base *= vpos.z; + out.vtx_offs *= vpos.z; + } } if (!div_pos_z) { @@ -107,6 +121,8 @@ vertex VertexOut vs_main(VertexIn in [[stage_in]], constant VertexShaderUniforms constant bool has_fog_table = pp_fog_ctrl != 2; constant bool has_palette = pp_palette != 0; +constant bool is_flat = pp_gouraud == 0; +constant bool is_not_flag = !is_flat; struct FragmentShaderUniforms { @@ -128,9 +144,10 @@ vertex VertexOut vs_main(VertexIn in [[stage_in]], constant VertexShaderUniforms struct VertexOut { - // TODO: Interpolation mode - float4 vtx_base; - float4 vtx_offs; + float4 flat_vtx_base [[flat, function_constant(is_flat)]]; + float4 flat_vtx_offs [[flat, function_constant(is_flat)]]; + float4 vtx_base [[function_constant(is_not_flag)]]; + float4 vtx_offs [[function_constant(is_not_flag)]]; float3 vtx_uv; float4 position [[position]]; }; @@ -228,8 +245,16 @@ fragment FragmentOut fs_main(VertexOut in [[stage_in]], constant FragmentShaderU discard_fragment(); } - float4 color = in.vtx_base; - float4 offset = in.vtx_offs; + float4 color; + float4 offset; + + if (is_flat) { + color = in.flat_vtx_base; + float4 offset = in.flat_vtx_offs; + } else { + color = in.vtx_base; + float4 offset = in.vtx_offs; + } if (pp_gouraud && !div_pos_z) { color /= in.vtx_uv.z; @@ -446,7 +471,6 @@ fragment FragmentOut fs_main(VertexOut in [[stage_in]], } )"; -// TODO: Handle gouraud interpolation // TODO: N2 Shaders MetalShaders::MetalShaders() { From 28dc00fb7e830f17380794264dd12bc78cb21957 Mon Sep 17 00:00:00 2001 From: Isaac Marovitz Date: Thu, 22 May 2025 20:18:58 -0400 Subject: [PATCH 17/48] Fix ModVolShader function constant Signed-off-by: Isaac Marovitz --- core/rend/metal/metal_shaders.mm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/rend/metal/metal_shaders.mm b/core/rend/metal/metal_shaders.mm index 4ccb3778f8..f093ebaba9 100644 --- a/core/rend/metal/metal_shaders.mm +++ b/core/rend/metal/metal_shaders.mm @@ -405,7 +405,7 @@ fragment float4 fs_main(VertexOut in [[stage_in]], using namespace metal; -constant bool div_pos_z [[function_constant(1)]]; +constant bool div_pos_z [[function_constant(0)]]; struct VertexShaderUniforms { From cd46d183683eac4ca971da16c1ac78efd6da044e Mon Sep 17 00:00:00 2001 From: Isaac Marovitz Date: Thu, 22 May 2025 20:27:48 -0400 Subject: [PATCH 18/48] Fix offset Signed-off-by: Isaac Marovitz --- core/rend/metal/metal_shaders.mm | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/rend/metal/metal_shaders.mm b/core/rend/metal/metal_shaders.mm index f093ebaba9..a5374f5ada 100644 --- a/core/rend/metal/metal_shaders.mm +++ b/core/rend/metal/metal_shaders.mm @@ -250,10 +250,10 @@ fragment FragmentOut fs_main(VertexOut in [[stage_in]], constant FragmentShaderU if (is_flat) { color = in.flat_vtx_base; - float4 offset = in.flat_vtx_offs; + offset = in.flat_vtx_offs; } else { color = in.vtx_base; - float4 offset = in.vtx_offs; + offset = in.vtx_offs; } if (pp_gouraud && !div_pos_z) { From e835e2e9d97b5b82801f7755854111c72c3a58a4 Mon Sep 17 00:00:00 2001 From: Isaac Marovitz Date: Tue, 27 May 2025 14:39:07 -0400 Subject: [PATCH 19/48] Fix missing symbol Signed-off-by: Isaac Marovitz --- core/rend/metal/metal_buffer.mm | 2 ++ 1 file changed, 2 insertions(+) diff --git a/core/rend/metal/metal_buffer.mm b/core/rend/metal/metal_buffer.mm index 3cd72da832..34578fd17e 100644 --- a/core/rend/metal/metal_buffer.mm +++ b/core/rend/metal/metal_buffer.mm @@ -25,3 +25,5 @@ buffer = [device newBufferWithLength:size options:MTLResourceStorageModeShared]; } + +BufferPacker::BufferPacker() {} From ceee78b7c67636604cfc63f524dafe43973f3499 Mon Sep 17 00:00:00 2001 From: Isaac Marovitz Date: Wed, 28 May 2025 17:58:29 -0400 Subject: [PATCH 20/48] Resolve buffer packer symbol conflict Signed-off-by: Isaac Marovitz --- core/rend/metal/metal_buffer.h | 4 ++-- core/rend/metal/metal_buffer.mm | 2 +- core/rend/metal/metal_renderer.mm | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/core/rend/metal/metal_buffer.h b/core/rend/metal/metal_buffer.h index 32bbc04114..854d92cd3d 100644 --- a/core/rend/metal/metal_buffer.h +++ b/core/rend/metal/metal_buffer.h @@ -65,10 +65,10 @@ struct MetalBufferData u64 bufferSize; }; -class BufferPacker +class MetalBufferPacker { public: - BufferPacker(); + MetalBufferPacker(); u64 addUniform(const void *p, size_t size) { return add(p, size); diff --git a/core/rend/metal/metal_buffer.mm b/core/rend/metal/metal_buffer.mm index 34578fd17e..31ea2ec9f6 100644 --- a/core/rend/metal/metal_buffer.mm +++ b/core/rend/metal/metal_buffer.mm @@ -26,4 +26,4 @@ buffer = [device newBufferWithLength:size options:MTLResourceStorageModeShared]; } -BufferPacker::BufferPacker() {} +MetalBufferPacker::MetalBufferPacker() {} diff --git a/core/rend/metal/metal_renderer.mm b/core/rend/metal/metal_renderer.mm index 8f3f3d6c2e..c9cf7cdc83 100644 --- a/core/rend/metal/metal_renderer.mm +++ b/core/rend/metal/metal_renderer.mm @@ -414,7 +414,7 @@ } void MetalRenderer::UploadMainBuffer(const VertexShaderUniforms &vertexUniforms, const FragmentShaderUniforms &fragmentUniforms) { - BufferPacker packer; + MetalBufferPacker packer; // Vertex packer.add(pvrrc.verts.data(), pvrrc.verts.size() * sizeof(decltype(*pvrrc.verts.data()))); From 49a8b404e10f803483551d75b0d0aa277217ac57 Mon Sep 17 00:00:00 2001 From: Isaac Marovitz Date: Wed, 28 May 2025 18:47:34 -0400 Subject: [PATCH 21/48] Fix push constant alignment Signed-off-by: Isaac Marovitz --- core/rend/metal/metal_buffer.h | 10 +++++----- core/rend/metal/metal_renderer.mm | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/core/rend/metal/metal_buffer.h b/core/rend/metal/metal_buffer.h index 854d92cd3d..2426136b15 100644 --- a/core/rend/metal/metal_buffer.h +++ b/core/rend/metal/metal_buffer.h @@ -105,14 +105,14 @@ class MetalBufferPacker return offset; } -private: - std::vector chunks; - std::vector chunkSizes; - u64 offset = 0; - static inline u32 align(u64 offset, u32 alignment) { u32 pad = (u32)(offset & (alignment - 1)); return pad == 0 ? 0 : alignment - pad; } + +private: + std::vector chunks; + std::vector chunkSizes; + u64 offset = 0; }; \ No newline at end of file diff --git a/core/rend/metal/metal_renderer.mm b/core/rend/metal/metal_renderer.mm index c9cf7cdc83..3e0aff2257 100644 --- a/core/rend/metal/metal_renderer.mm +++ b/core/rend/metal/metal_renderer.mm @@ -237,7 +237,7 @@ palette_index }; - [encoder setFragmentBytes:pushConstants.data() length:sizeof(pushConstants) atIndex:1]; + [encoder setFragmentBytes:pushConstants.data() length:sizeof(pushConstants) + MetalBufferPacker::align(sizeof(pushConstants), 16) atIndex:1]; } bool shadowed = listType == ListType_Opaque || listType == ListType_Punch_Through; @@ -345,7 +345,7 @@ id depth_state; const std::array pushConstants = { 1 - FPU_SHAD_SCALE.scale_factor / 256.f }; - [encoder setFragmentBytes:pushConstants.data() length:sizeof(pushConstants) atIndex:1]; + [encoder setFragmentBytes:pushConstants.data() length:sizeof(pushConstants) + MetalBufferPacker::align(sizeof(pushConstants), 16) atIndex:1]; for (int cmv = 0; cmv < count; cmv++) { ModifierVolumeParam& param = params[cmv]; From 8b875d6a1ca2fb5fea7eff30444201133cf304bd Mon Sep 17 00:00:00 2001 From: Isaac Marovitz Date: Wed, 28 May 2025 18:54:09 -0400 Subject: [PATCH 22/48] Fix div_pos_z typo Signed-off-by: Isaac Marovitz --- core/rend/metal/metal_shaders.mm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/rend/metal/metal_shaders.mm b/core/rend/metal/metal_shaders.mm index a5374f5ada..b6bf66f816 100644 --- a/core/rend/metal/metal_shaders.mm +++ b/core/rend/metal/metal_shaders.mm @@ -208,7 +208,7 @@ float4 palette_pixel_bilinear(texture2d texture, sampler texture_sampler, texture2d palette, sampler palette_sampler, float3 coords, constant PushBlock& push_constants) { - if (div_pos_z) { + if (!div_pos_z) { coords.xy /= coords.z; } From d4c9045e58e389f96acaa3d390fdf477733bcd6c Mon Sep 17 00:00:00 2001 From: Isaac Marovitz Date: Wed, 28 May 2025 19:26:14 -0400 Subject: [PATCH 23/48] Stop resources from becoming corrupted in flight Signed-off-by: Isaac Marovitz --- core/rend/metal/metal_renderer.mm | 48 +++++++++++++++---------------- 1 file changed, 23 insertions(+), 25 deletions(-) diff --git a/core/rend/metal/metal_renderer.mm b/core/rend/metal/metal_renderer.mm index 3e0aff2257..3331633423 100644 --- a/core/rend/metal/metal_renderer.mm +++ b/core/rend/metal/metal_renderer.mm @@ -274,12 +274,11 @@ MTLPrimitiveType primitive = sortTriangles && !config::PerStripSorting ? MTLPrimitiveTypeTriangle : MTLPrimitiveTypeTriangleStrip; - [encoder drawIndexedPrimitives: primitive - indexCount: count - indexType: MTLIndexTypeUInt32 - indexBuffer: curMainBuffer - indexBufferOffset: offsets.indexOffset + first * sizeof(u32) - instanceCount: 1]; + [encoder drawIndexedPrimitives:primitive + indexCount:count + indexType:MTLIndexTypeUInt32 + indexBuffer:curMainBuffer + indexBufferOffset:offsets.indexOffset + first * sizeof(u32)]; } void MetalRenderer::DrawSorted(id encoder, const std::vector &polys, u32 first, u32 last, bool multipass) @@ -306,9 +305,9 @@ SetTileClip(encoder, polyParam.tileclip, scissorRect); [encoder drawIndexedPrimitives:MTLPrimitiveTypeTriangle indexCount:param.count - indexType:MTLIndexTypeUInt32 indexBuffer:curMainBuffer - indexBufferOffset:offsets.indexOffset + param.first * sizeof(u32) - instanceCount:1]; + indexType:MTLIndexTypeUInt32 + indexBuffer:curMainBuffer + indexBufferOffset:offsets.indexOffset + param.first * sizeof(u32)]; } } @@ -378,8 +377,7 @@ [encoder drawPrimitives:MTLPrimitiveTypeTriangle vertexStart:param.first * 3 - vertexCount:param.count * 3 - instanceCount:1]; + vertexCount:param.count * 3]; if (mv_mode == 1 || mv_mode == 2) { @@ -391,8 +389,7 @@ [encoder setStencilReferenceValue:1]; [encoder drawPrimitives:MTLPrimitiveTypeTriangle vertexStart: mod_base * 3 - vertexCount: (param.first + param.count - mod_base) * 3 - instanceCount: 1]; + vertexCount: (param.first + param.count - mod_base) * 3]; mod_base = -1; } } @@ -403,12 +400,11 @@ [encoder setRenderPipelineState:state]; [encoder setDepthStencilState:depth_state]; [encoder setStencilReferenceValue:0x81]; - [encoder drawIndexedPrimitives: MTLPrimitiveTypeTriangleStrip - indexCount: 4 - indexType: MTLIndexTypeUInt32 - indexBuffer: curMainBuffer - indexBufferOffset: offsets.indexOffset - instanceCount: 1]; + [encoder drawIndexedPrimitives:MTLPrimitiveTypeTriangleStrip + indexCount:4 + indexType:MTLIndexTypeUInt32 + indexBuffer:curMainBuffer + indexBufferOffset:offsets.indexOffset]; [encoder popDebugGroup]; } @@ -447,17 +443,17 @@ case 0: // 0555 KRGB 16 bit case 3: // 1555 ARGB 16 bit fragUniforms.ditherColorMax[0] = fragUniforms.ditherColorMax[1] = fragUniforms.ditherColorMax[2] = 31.f; - fragUniforms.ditherColorMax[3] = 255.f; - break; + fragUniforms.ditherColorMax[3] = 255.f; + break; case 1: // 565 RGB 16 bit fragUniforms.ditherColorMax[0] = fragUniforms.ditherColorMax[2] = 31.f; - fragUniforms.ditherColorMax[1] = 63.f; - fragUniforms.ditherColorMax[3] = 255.f; - break; + fragUniforms.ditherColorMax[1] = 63.f; + fragUniforms.ditherColorMax[3] = 255.f; + break; case 2: // 4444 ARGB 16 bit fragUniforms.ditherColorMax[0] = fragUniforms.ditherColorMax[1] = fragUniforms.ditherColorMax[2] = fragUniforms.ditherColorMax[3] = 15.f; - break; + break; default: break; } @@ -588,6 +584,8 @@ [buffer presentDrawable:drawable]; [buffer commit]; + // TODO: Properly handle wait/vsync/buffering! + [buffer waitUntilCompleted]; DEBUG_LOG(RENDERER, "Render command buffer released"); return !pvrrc.isRTT; From 86b8363dcb05b0f407f6cf5690e98f56b0687675 Mon Sep 17 00:00:00 2001 From: Isaac Marovitz Date: Wed, 28 May 2025 21:46:28 -0400 Subject: [PATCH 24/48] Fix texture memory usage Signed-off-by: Isaac Marovitz --- core/rend/metal/metal_renderer.h | 2 + core/rend/metal/metal_renderer.mm | 73 ++++++++++++++++++++----------- 2 files changed, 49 insertions(+), 26 deletions(-) diff --git a/core/rend/metal/metal_renderer.h b/core/rend/metal/metal_renderer.h index 3e2a697ce9..229588783b 100644 --- a/core/rend/metal/metal_renderer.h +++ b/core/rend/metal/metal_renderer.h @@ -107,6 +107,7 @@ class MetalRenderer final : public Renderer void CheckFogTexture(); void CheckPaletteTexture(); + void WaitIdle(); struct { u64 indexOffset = 0; @@ -131,6 +132,7 @@ class MetalRenderer final : public Renderer id depthBuffer = nil; id curMainBuffer = nil; + id commandBuffer = nil; MetalBufferData* mainBuffer; MetalPipelineManager pipelineManager = MetalPipelineManager(this); MetalShaders shaders; diff --git a/core/rend/metal/metal_renderer.mm b/core/rend/metal/metal_renderer.mm index 3331633423..71ce3edf23 100644 --- a/core/rend/metal/metal_renderer.mm +++ b/core/rend/metal/metal_renderer.mm @@ -39,6 +39,7 @@ } void MetalRenderer::Term() { + WaitIdle(); pipelineManager.term(); shaders.term(); samplers.term(); @@ -133,6 +134,11 @@ paletteTexture->UploadToGPU(1024, 1, (u8 *)palette32_ram, false); } +void MetalRenderer::WaitIdle() { + [commandBuffer waitUntilCompleted]; + commandBuffer = nil; +} + TileClipping MetalRenderer::SetTileClip(id encoder, u32 val, MTLScissorRect& clipRect) { int rect[4] = {}; TileClipping clipMode = GetTileClip(val, matrices.GetViewportMatrix(), rect); @@ -435,6 +441,15 @@ } bool MetalRenderer::Draw(const MetalTexture *fogTexture, const MetalTexture *paletteTexture) { + matrices.CalcMatrices(&pvrrc); + u32 origWidth = pvrrc.getFramebufferWidth(); + u32 origHeight = pvrrc.getFramebufferHeight(); + u32 upscaledWidth = origWidth; + u32 upscaledHeight = origHeight; + u32 widthPow2; + u32 heightPow2; + getRenderToTextureDimensions(upscaledWidth, upscaledHeight, widthPow2, heightPow2); + FragmentShaderUniforms fragUniforms = MakeFragmentUniforms(); dithering = config::EmulateFramebuffer && pvrrc.fb_W_CTRL.fb_dither && pvrrc.fb_W_CTRL.fb_packmode <= 3; if (dithering) { @@ -461,35 +476,41 @@ currentScissor = MTLScissorRect {}; - if (frameBuffer != nil) { - [frameBuffer setPurgeableState:MTLPurgeableStateEmpty]; - frameBuffer = nil; - } + if (!frameBuffer || widthPow2 > frameBuffer.width || heightPow2 > frameBuffer.height) { + if (frameBuffer) { + WaitIdle(); + [frameBuffer setPurgeableState:MTLPurgeableStateEmpty]; + frameBuffer = nil; + } - if (depthBuffer != nil) { - [depthBuffer setPurgeableState:MTLPurgeableStateEmpty]; - depthBuffer = nil; - } + MTLTextureDescriptor *desc = [[MTLTextureDescriptor alloc] init]; + [desc setPixelFormat:MTLPixelFormatBGRA8Unorm]; + [desc setWidth:widthPow2]; + [desc setHeight:heightPow2]; + [desc setUsage:MTLTextureUsageShaderRead | MTLTextureUsageRenderTarget]; - MTLTextureDescriptor *desc = [[MTLTextureDescriptor alloc] init]; - [desc setPixelFormat:MTLPixelFormatBGRA8Unorm]; - [desc setWidth:pvrrc.framebufferWidth]; - [desc setHeight:pvrrc.framebufferHeight]; - [desc setUsage:MTLTextureUsageShaderRead | MTLTextureUsageRenderTarget]; + frameBuffer = [MetalContext::Instance()->GetDevice() newTextureWithDescriptor:desc]; + } - frameBuffer = [MetalContext::Instance()->GetDevice() newTextureWithDescriptor:desc]; + if (!depthBuffer || widthPow2 > depthBuffer.width || heightPow2 > depthBuffer.height) { + if (depthBuffer) { + WaitIdle(); + [depthBuffer setPurgeableState:MTLPurgeableStateEmpty]; + depthBuffer = nil; + } - MTLTextureDescriptor *depthDesc = [[MTLTextureDescriptor alloc] init]; - [depthDesc setPixelFormat:MTLPixelFormatDepth32Float_Stencil8]; - [depthDesc setWidth:pvrrc.framebufferWidth]; - [depthDesc setHeight:pvrrc.framebufferHeight]; - [depthDesc setUsage:MTLTextureUsageShaderRead | MTLTextureUsageRenderTarget]; + MTLTextureDescriptor *depthDesc = [[MTLTextureDescriptor alloc] init]; + [depthDesc setPixelFormat:MTLPixelFormatDepth32Float_Stencil8]; + [depthDesc setWidth:widthPow2]; + [depthDesc setHeight:heightPow2]; + [depthDesc setUsage:MTLTextureUsageShaderRead | MTLTextureUsageRenderTarget]; - depthBuffer = [MetalContext::Instance()->GetDevice() newTextureWithDescriptor:depthDesc]; + depthBuffer = [MetalContext::Instance()->GetDevice() newTextureWithDescriptor:depthDesc]; + } auto drawable = [MetalContext::Instance()->GetLayer() nextDrawable]; - id buffer = [MetalContext::Instance()->GetQueue() commandBuffer]; + commandBuffer = [MetalContext::Instance()->GetQueue() commandBuffer]; MTLRenderPassDescriptor *descriptor = [[MTLRenderPassDescriptor alloc] init]; auto color = [descriptor colorAttachments][0]; [color setTexture:frameBuffer]; @@ -510,7 +531,7 @@ [descriptor setStencilAttachment:stencilAttachmentDescriptor]; @autoreleasepool { - id renderEncoder = [buffer renderCommandEncoderWithDescriptor:descriptor]; + id renderEncoder = [commandBuffer renderCommandEncoderWithDescriptor:descriptor]; [renderEncoder setFragmentTexture:fogTexture->texture atIndex:2]; [renderEncoder setFragmentTexture:paletteTexture->texture atIndex:3]; @@ -574,7 +595,7 @@ [color setStoreAction:MTLStoreActionStore]; @autoreleasepool { - id renderEncoder = [buffer renderCommandEncoderWithDescriptor:descriptor]; + id renderEncoder = [commandBuffer renderCommandEncoderWithDescriptor:descriptor]; [renderEncoder setRenderPipelineState:pipelineManager.GetBlitPassPipeline()]; [renderEncoder setFragmentTexture:frameBuffer atIndex:0]; @@ -582,10 +603,10 @@ [renderEncoder endEncoding]; } - [buffer presentDrawable:drawable]; - [buffer commit]; + [commandBuffer presentDrawable:drawable]; + [commandBuffer commit]; // TODO: Properly handle wait/vsync/buffering! - [buffer waitUntilCompleted]; + WaitIdle(); DEBUG_LOG(RENDERER, "Render command buffer released"); return !pvrrc.isRTT; From e9e6050ba391433813dfd59a28b7e436c3eef598 Mon Sep 17 00:00:00 2001 From: Isaac Marovitz Date: Wed, 28 May 2025 21:52:07 -0400 Subject: [PATCH 25/48] Set first provoking vertex on Metal Signed-off-by: Isaac Marovitz --- core/rend/metal/metal_renderer.mm | 3 +++ core/rend/sorter.cpp | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/core/rend/metal/metal_renderer.mm b/core/rend/metal/metal_renderer.mm index 71ce3edf23..e68c55f172 100644 --- a/core/rend/metal/metal_renderer.mm +++ b/core/rend/metal/metal_renderer.mm @@ -24,6 +24,7 @@ #include "hw/aica/dsp.h" #include "hw/pvr/ta.h" #include "hw/pvr/pvr_mem.h" +#include "rend/sorter.h" bool MetalRenderer::Init() { @@ -550,6 +551,8 @@ palTsp.ClampV = 1; [renderEncoder setFragmentSamplerState:samplers.GetSampler(palTsp) atIndex:3]; + setFirstProvokingVertex(pvrrc); + // Upload vertex and index buffers VertexShaderUniforms vtxUniforms {}; vtxUniforms.ndcMat = matrices.GetNormalMatrix(); diff --git a/core/rend/sorter.cpp b/core/rend/sorter.cpp index 787fba0f14..bbec00370f 100644 --- a/core/rend/sorter.cpp +++ b/core/rend/sorter.cpp @@ -18,7 +18,7 @@ #include -// Vulkan and DirectX use the color values of the first vertex for flat shaded triangle strips. +// Vulkan, DirectX, and Metal use the color values of the first vertex for flat shaded triangle strips. // On Dreamcast the last vertex is the provoking one so we must copy it onto the first. void setFirstProvokingVertex(rend_context& rendContext) { From cc484d1c1d896990ff02013ca52fb0f14f54eeb9 Mon Sep 17 00:00:00 2001 From: Isaac Marovitz Date: Wed, 28 May 2025 22:10:43 -0400 Subject: [PATCH 26/48] Make metal_context hack better Signed-off-by: Isaac Marovitz --- CMakeLists.txt | 9 ++++++++- core/rend/metal/metal_context.h | 20 +++----------------- core/wsi/switcher.mm | 3 +++ 3 files changed, 14 insertions(+), 18 deletions(-) create mode 100644 core/wsi/switcher.mm diff --git a/CMakeLists.txt b/CMakeLists.txt index 954126f640..aaa7198678 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1342,8 +1342,15 @@ endif() target_sources(${PROJECT_NAME} PRIVATE core/wsi/context.h core/wsi/libretro.cpp - core/wsi/libretro.h + core/wsi/libretro.h) + +if(USE_METAL) + target_sources(${PROJECT_NAME} PRIVATE + core/wsi/switcher.mm) +else() + target_sources(${PROJECT_NAME} PRIVATE core/wsi/switcher.cpp) +endif() if(USE_OPENGL) target_compile_definitions(${PROJECT_NAME} PRIVATE USE_OPENGL) diff --git a/core/rend/metal/metal_context.h b/core/rend/metal/metal_context.h index 126e8780a9..80d1087b54 100644 --- a/core/rend/metal/metal_context.h +++ b/core/rend/metal/metal_context.h @@ -18,10 +18,8 @@ Copyright 2024 flyinghead */ #pragma once -#ifdef __OBJC__ #include #include -#endif #include "wsi/context.h" @@ -33,19 +31,12 @@ class MetalContext : public GraphicsContext bool init(); void term() override; + void resize() override; + void Present(); -#ifdef __OBJC__ id GetDevice() const { return device; } CAMetalLayer* GetLayer() const { return layer; } id GetQueue() const { return queue; } -// Hack to make sure everything lines up when in pure C++ -#else - void* GetDevice() const { return device; } - void* GetLayer() const { return layer; } - void* GetQueue() const { return queue; } -#endif - void resize() override; - void Present(); std::string getDriverName() override; @@ -63,14 +54,9 @@ class MetalContext : public GraphicsContext static MetalContext* Instance() { return contextInstance; } private: -#ifdef __OBJC__ id device = nil; id queue = nil; CAMetalLayer* layer; -#else - void* device = nullptr; - void* queue = nullptr; - void* layer = nullptr; -#endif + static MetalContext* contextInstance; }; \ No newline at end of file diff --git a/core/wsi/switcher.mm b/core/wsi/switcher.mm new file mode 100644 index 0000000000..fcce81f5ed --- /dev/null +++ b/core/wsi/switcher.mm @@ -0,0 +1,3 @@ +// When including Metal, switcher needs to be built +// with Obj-C++ to properly resolve types. +#include "switcher.cpp" \ No newline at end of file From 13aa73c6874dc3f68075f6493b432a0c63d8f077 Mon Sep 17 00:00:00 2001 From: Isaac Marovitz Date: Wed, 28 May 2025 22:23:51 -0400 Subject: [PATCH 27/48] Update dither Signed-off-by: Isaac Marovitz --- core/rend/metal/metal_renderer.mm | 12 +++++------- core/rend/metal/metal_shaders.h | 2 +- core/rend/metal/metal_shaders.mm | 16 +++++++--------- 3 files changed, 13 insertions(+), 17 deletions(-) diff --git a/core/rend/metal/metal_renderer.mm b/core/rend/metal/metal_renderer.mm index e68c55f172..fb94dd85e9 100644 --- a/core/rend/metal/metal_renderer.mm +++ b/core/rend/metal/metal_renderer.mm @@ -458,21 +458,19 @@ { case 0: // 0555 KRGB 16 bit case 3: // 1555 ARGB 16 bit - fragUniforms.ditherColorMax[0] = fragUniforms.ditherColorMax[1] = fragUniforms.ditherColorMax[2] = 31.f; - fragUniforms.ditherColorMax[3] = 255.f; + fragUniforms.ditherDivisor[0] = fragUniforms.ditherDivisor[1] = fragUniforms.ditherDivisor[2] = 2.f; break; case 1: // 565 RGB 16 bit - fragUniforms.ditherColorMax[0] = fragUniforms.ditherColorMax[2] = 31.f; - fragUniforms.ditherColorMax[1] = 63.f; - fragUniforms.ditherColorMax[3] = 255.f; + fragUniforms.ditherDivisor[0] = fragUniforms.ditherDivisor[2] = 2.f; + fragUniforms.ditherDivisor[1] = 4.f; break; case 2: // 4444 ARGB 16 bit - fragUniforms.ditherColorMax[0] = fragUniforms.ditherColorMax[1] - = fragUniforms.ditherColorMax[2] = fragUniforms.ditherColorMax[3] = 15.f; + fragUniforms.ditherDivisor[0] = fragUniforms.ditherDivisor[1] = fragUniforms.ditherDivisor[2] = 1.f; break; default: break; } + fragUniforms.ditherDivisor[3] = 1.f; } currentScissor = MTLScissorRect {}; diff --git a/core/rend/metal/metal_shaders.h b/core/rend/metal/metal_shaders.h index 2fa46f0d3b..38d38873c8 100644 --- a/core/rend/metal/metal_shaders.h +++ b/core/rend/metal/metal_shaders.h @@ -82,7 +82,7 @@ struct FragmentShaderUniforms float colorClampMax[4]; float sp_FOG_COL_RAM[4]; // Only using 3 elements but easier for std140 float sp_FOG_COL_VERT[4]; // same comment - float ditherColorMax[4]; + float ditherDivisor[4]; float cp_AlphaTestValue; float sp_FOG_DENSITY; }; diff --git a/core/rend/metal/metal_shaders.mm b/core/rend/metal/metal_shaders.mm index b6bf66f816..9b786cf2e1 100644 --- a/core/rend/metal/metal_shaders.mm +++ b/core/rend/metal/metal_shaders.mm @@ -130,7 +130,7 @@ vertex VertexOut vs_main(VertexIn in [[stage_in]], constant VertexShaderUniforms float4 color_clamp_max; float4 sp_fog_col_ram; float4 sp_fog_col_vert; - float4 dither_color_max; + float4 dither_divisor; float cp_alpha_test_value; float sp_fog_density; }; @@ -344,17 +344,15 @@ fragment FragmentOut fs_main(VertexOut in [[stage_in]], constant FragmentShaderU if (dithering) { constexpr float dither_table[16] = { - 0.9375, 0.1875, 0.75, 0.0, - 0.4375, 0.6875, 0.25, 0.5, - 0.8125, 0.0625, 0.875, 0.125, - 0.3125, 0.5625, 0.375, 0.625 + 5, 13, 7, 15, + 9, 1, 11, 3, + 6, 14, 4, 12, + 10, 2, 8, 0 }; float r = dither_table[int(fmod(in.position.y, 4.0)) * 4 + int(fmod(in.position.x, 4.0))]; - // 31 for 5-bit color, 63 for 6-bits, 15 for 4 bits - color += r / uniforms.dither_color_max; - // Avoid rounding - color = floor(color * 255.0) / 255.0; + float4 dv = float4(r, r, r, 1.0) / uniforms.dither_divisor; + color = clamp(floor(color * 255 + dv) / 255, 0, 1); } return FragmentOut { color, depth }; From d7f16225695e0c1db99401db323b28d679795ded Mon Sep 17 00:00:00 2001 From: Isaac Marovitz Date: Wed, 28 May 2025 22:49:07 -0400 Subject: [PATCH 28/48] Force Metal off on not Apple Signed-off-by: Isaac Marovitz --- CMakeLists.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index aaa7198678..a4a12779a6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -88,6 +88,10 @@ if(IOS AND NOT LIBRETRO) set(USE_VULKAN OFF CACHE BOOL "Force vulkan off" FORCE) endif() +if(NOT APPLE) + set(USE_METAL OFF CACHE BOOL "Force metal off" FORCE) +endif() + include(GNUInstallDirs) include(CMakeRC) From 61475244e6ac36e9d07d278caf59f05590eb6c61 Mon Sep 17 00:00:00 2001 From: Isaac Marovitz Date: Thu, 29 May 2025 14:52:37 -0400 Subject: [PATCH 29/48] Fix Palette Texture Format Signed-off-by: Isaac Marovitz --- core/rend/metal/metal_renderer.mm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/rend/metal/metal_renderer.mm b/core/rend/metal/metal_renderer.mm index fb94dd85e9..b5126d8fae 100644 --- a/core/rend/metal/metal_renderer.mm +++ b/core/rend/metal/metal_renderer.mm @@ -126,7 +126,7 @@ if (!paletteTexture) { paletteTexture = std::make_unique(); - paletteTexture->tex_type = TextureType::_8; + paletteTexture->tex_type = TextureType::_8888; } else if (!updatePalette) return; From 291be28439e9ec2b8b87b7452ac9f8a82109ce36 Mon Sep 17 00:00:00 2001 From: Isaac Marovitz Date: Thu, 29 May 2025 14:57:22 -0400 Subject: [PATCH 30/48] Texture Rework + Mipmaps Signed-off-by: Isaac Marovitz --- core/rend/metal/metal_driver.h | 6 +- core/rend/metal/metal_renderer.h | 1 + core/rend/metal/metal_renderer.mm | 35 +++++-- core/rend/metal/metal_texture.h | 17 +++- core/rend/metal/metal_texture.mm | 156 ++++++++++++++++++++++++------ 5 files changed, 175 insertions(+), 40 deletions(-) diff --git a/core/rend/metal/metal_driver.h b/core/rend/metal/metal_driver.h index 33c236aee5..c810eb8c84 100644 --- a/core/rend/metal/metal_driver.h +++ b/core/rend/metal/metal_driver.h @@ -77,7 +77,7 @@ class MetalDriver final : public ImGuiDriver { ImTextureID getTexture(const std::string &name) override { auto it = textures.find(name); if (it != textures.end()) - return (ImTextureID)(intptr_t)(__bridge void*)it->second.texture->texture; + return (ImTextureID)(intptr_t)(__bridge void*)it->second.texture->GetTexture(); return ImTextureID{}; } @@ -87,7 +87,7 @@ class MetalDriver final : public ImGuiDriver { texture.texture->tex_type = TextureType::_8888; texture.texture->UploadToGPU(width, height, data, false); - auto textureID = (ImTextureID)(intptr_t)(__bridge void*)texture.texture->texture; + auto textureID = (ImTextureID)(intptr_t)(__bridge void*)texture.texture->GetTexture(); textures[name] = std::move(texture); @@ -96,7 +96,7 @@ class MetalDriver final : public ImGuiDriver { void deleteTexture(const std::string &name) override { auto it = textures.find(name); - [it->second.texture->texture setPurgeableState:MTLPurgeableStateEmpty]; + [it->second.texture->GetTexture() setPurgeableState:MTLPurgeableStateEmpty]; textures.erase(name); } diff --git a/core/rend/metal/metal_renderer.h b/core/rend/metal/metal_renderer.h index 229588783b..b9151679cf 100644 --- a/core/rend/metal/metal_renderer.h +++ b/core/rend/metal/metal_renderer.h @@ -133,6 +133,7 @@ class MetalRenderer final : public Renderer id curMainBuffer = nil; id commandBuffer = nil; + id texCommandBuffer = nil; MetalBufferData* mainBuffer; MetalPipelineManager pipelineManager = MetalPipelineManager(this); MetalShaders shaders; diff --git a/core/rend/metal/metal_renderer.mm b/core/rend/metal/metal_renderer.mm index b5126d8fae..a220c674b5 100644 --- a/core/rend/metal/metal_renderer.mm +++ b/core/rend/metal/metal_renderer.mm @@ -54,12 +54,20 @@ if (!config::EmulateFramebuffer) clearLastFrame = false; } + if (resetTextureCache) { + textureCache.Clear(); + resetTextureCache = false; + } + + texCommandBuffer = [MetalContext::Instance()->GetQueue() commandBuffer]; ta_parse(ctx, true); // TODO can't update fog or palette twice in multi render CheckFogTexture(); CheckPaletteTexture(); + [texCommandBuffer commit]; + texCommandBuffer = nil; } bool MetalRenderer::Render() { @@ -95,13 +103,18 @@ MetalTexture* tf = textureCache.getTextureCacheData(tsp, tcw); if (tf->NeedsUpdate()) { + tf->SetCommandBuffer(texCommandBuffer); + if (!tf->Update()) { - tf= nullptr; + tf = nullptr; + return nullptr; } } else if (tf->IsCustomTextureAvailable()) { // TODO + tf->SetCommandBuffer(texCommandBuffer); } + tf->SetCommandBuffer(nil); return tf; } @@ -119,7 +132,9 @@ u8 texData[256]; MakeFogTexture(texData); + fogTexture->SetCommandBuffer(texCommandBuffer); fogTexture->UploadToGPU(128, 2, texData, false); + fogTexture->SetCommandBuffer(nil); } void MetalRenderer::CheckPaletteTexture() { @@ -132,7 +147,9 @@ return; updatePalette = false; + paletteTexture->SetCommandBuffer(texCommandBuffer); paletteTexture->UploadToGPU(1024, 1, (u8 *)palette32_ram, false); + paletteTexture->SetCommandBuffer(nil); } void MetalRenderer::WaitIdle() { @@ -233,9 +250,11 @@ palette_index = float(poly.tcw.PalSelect >> 4 << 8) / 1023.0f; } + std::array pushConstants; + if (tileClip == TileClipping::Inside || trilinearAlpha != 1.0f || gpuPalette != 0) { - const std::array pushConstants = { + pushConstants = { (float)scissorRect.x, (float)scissorRect.y, (float)scissorRect.x + (float)scissorRect.width, @@ -243,10 +262,12 @@ trilinearAlpha, palette_index }; - - [encoder setFragmentBytes:pushConstants.data() length:sizeof(pushConstants) + MetalBufferPacker::align(sizeof(pushConstants), 16) atIndex:1]; + } else { + pushConstants = { 0, 0, 0, 0, 0, 0 }; } + [encoder setFragmentBytes:pushConstants.data() length:sizeof(pushConstants) + MetalBufferPacker::align(sizeof(pushConstants), 16) atIndex:1]; + bool shadowed = listType == ListType_Opaque || listType == ListType_Punch_Through; [encoder setRenderPipelineState:pipelineManager.GetPipeline(listType, sortTriangles, poly, gpuPalette, dithering)]; @@ -261,7 +282,7 @@ } if (poly.texture != nullptr) { - auto texture = ((MetalTexture *)poly.texture)->texture; + auto texture = ((MetalTexture *)poly.texture)->GetTexture(); [encoder setFragmentTexture:texture atIndex:0]; // Texture sampler @@ -532,8 +553,8 @@ @autoreleasepool { id renderEncoder = [commandBuffer renderCommandEncoderWithDescriptor:descriptor]; - [renderEncoder setFragmentTexture:fogTexture->texture atIndex:2]; - [renderEncoder setFragmentTexture:paletteTexture->texture atIndex:3]; + [renderEncoder setFragmentTexture:fogTexture->GetTexture() atIndex:2]; + [renderEncoder setFragmentTexture:paletteTexture->GetTexture() atIndex:3]; // Fog sampler TSP fogTsp = {}; diff --git a/core/rend/metal/metal_texture.h b/core/rend/metal/metal_texture.h index c5dd36b57b..0a4bea4615 100644 --- a/core/rend/metal/metal_texture.h +++ b/core/rend/metal/metal_texture.h @@ -26,11 +26,24 @@ class MetalTexture final : public BaseTextureCacheData { public: MetalTexture(TSP tsp = {}, TCW tcw = {}) : BaseTextureCacheData(tsp, tcw) {} - id texture; std::string GetId() override { return std::to_string([texture gpuResourceID]._impl); } - void UploadToGPU(int width, int height, const u8 *temp_tex_buffer, bool mipmapped, bool mipmapsIncluded = false) override; + id GetTexture() const { return texture; } + void UploadToGPU(int width, int height, const u8 *data, bool mipmapped, bool mipmapsIncluded = false) override; + void SetCommandBuffer(id commandBuffer) { this->commandBuffer = commandBuffer; } bool Delete() override; + +private: + void Init(u32 width, u32 height, MTLPixelFormat format, u32 dataSize, bool mipmapped, bool mipmapsIncluded); + void SetImage(u32 srcSize, const void *srcData, bool genMipmaps); + void GenerateMipmaps(); + + MTLPixelFormat format = MTLPixelFormatInvalid; + u32 width = 0; + u32 height = 0; + u32 mipmapLevels = 1; + id commandBuffer; + id texture; }; class MetalSamplers diff --git a/core/rend/metal/metal_texture.mm b/core/rend/metal/metal_texture.mm index 2c2912dc75..992f67e591 100644 --- a/core/rend/metal/metal_texture.mm +++ b/core/rend/metal/metal_texture.mm @@ -19,44 +19,58 @@ #include "metal_texture.h" -void MetalTexture::UploadToGPU(int width, int height, const u8 *temp_tex_buffer, bool mipmapped, bool mipmapsIncluded) +void MetalTexture::UploadToGPU(int width, int height, const u8 *data, bool mipmapped, bool mipmapsIncluded) { MTLPixelFormat format = MTLPixelFormatInvalid; - - u32 bpp = 2; + u32 dataSize = width * height * 2; switch (tex_type) { - case TextureType::_5551: - format = MTLPixelFormatA1BGR5Unorm; - break; - case TextureType::_565: - format = MTLPixelFormatB5G6R5Unorm; - break; - case TextureType::_4444: - format = MTLPixelFormatABGR4Unorm; - break; - case TextureType::_8888: - bpp = 4; - format = MTLPixelFormatRGBA8Unorm; - break; - case TextureType::_8: - bpp = 1; - format = MTLPixelFormatR8Unorm; - break; + case TextureType::_5551: + format = MTLPixelFormatA1BGR5Unorm; + break; + case TextureType::_565: + format = MTLPixelFormatB5G6R5Unorm; + break; + case TextureType::_4444: + format = MTLPixelFormatABGR4Unorm; + break; + case TextureType::_8888: + format = MTLPixelFormatRGBA8Unorm; + dataSize *= 2; + break; + case TextureType::_8: + format = MTLPixelFormatR8Unorm; + dataSize /= 2; + break; } - - int mipmapLevels = 1; if (mipmapsIncluded) { - mipmapLevels = 0; - int dim = width; - while (dim != 0) + int w = width / 2; + u32 size = dataSize / 4; + while (w) { - mipmapLevels++; - dim >>= 1; + dataSize += ((size + 3) >> 2) << 2; // offset must be a multiple of 4 + size /= 4; + w /= 2; } } + if (width != this->width || height != this->height + || format != this->format || this->texture == nil) + Init(width, height, format, dataSize, mipmapped, mipmapsIncluded); + + SetImage(dataSize, data, mipmapped && !mipmapsIncluded); +} + +void MetalTexture::Init(u32 width, u32 height, MTLPixelFormat format, u32 dataSize, bool mipmapped, bool mipmapsIncluded) +{ + this->width = width; + this->height = height; + this->format = format; + mipmapLevels = 1; + if (mipmapped) + mipmapLevels += floor(log2(std::max(width, height))); + MTLTextureDescriptor *desc = [[MTLTextureDescriptor alloc] init]; [desc setWidth:width]; @@ -69,8 +83,94 @@ auto device = MetalContext::Instance()->GetDevice(); texture = [device newTextureWithDescriptor:desc]; +} + +void MetalTexture::SetImage(u32 srcSize, const void *srcData, bool genMipmaps) { + u32 bpp; + switch (tex_type) { + case TextureType::_8888: + bpp = 4; + break; + case TextureType::_8: + bpp = 1; + break; + default: + bpp = 2; + break; + } + + if (mipmapLevels > 1 && !genMipmaps && tex_type != TextureType::_8888) + { + u8 *src = (u8 *)srcData; + u32 dataOffset = 0; + + for (u32 i = 0; i < mipmapLevels; i++) { + const u32 size = (1 << (2 * i)) * bpp; + + u32 mipLevel = mipmapLevels - i - 1; + u32 mipWidth = std::max(texture.width >> mipLevel, 1ul); + u32 mipHeight = std::max(texture.height >> mipLevel, 1ul); + + MTLRegion region = MTLRegionMake2D(0, 0, mipWidth, mipHeight); + [texture replaceRegion:region + mipmapLevel:mipLevel + withBytes:src + dataOffset + bytesPerRow:mipWidth * bpp]; + + dataOffset += ((size + 3) >> 2) << 2; + } + } + else + { + u32 rowBytes = texture.width * bpp; + + MTLRegion region = MTLRegionMake2D(0, 0, texture.width, texture.height); + [texture replaceRegion:region + mipmapLevel:0 + withBytes:srcData + bytesPerRow:rowBytes]; + + if (mipmapLevels > 1 && genMipmaps) { + GenerateMipmaps(); + } + } +} + +void MetalTexture::GenerateMipmaps() +{ + verify((bool)commandBuffer); + [commandBuffer setLabel:@"Mipmap Generation"]; + + id blitEncoder = [commandBuffer blitCommandEncoder]; + + u32 mipWidth = this->width; + u32 mipHeight = this->height; + + for (u32 i = 1; i < mipmapLevels; i++) { + u32 srcWidth = mipWidth; + u32 srcHeight = mipHeight; + + mipWidth = std::max(mipWidth / 2, 1u); + mipHeight = std::max(mipHeight / 2, 1u); + + MTLOrigin srcOrigin = MTLOriginMake(0, 0, 0); + MTLSize srcSize = MTLSizeMake(srcWidth, srcHeight, 1); + + MTLOrigin dstOrigin = MTLOriginMake(0, 0, 0); + MTLSize dstSize = MTLSizeMake(mipWidth, mipHeight, 1); + + [blitEncoder copyFromTexture:texture + sourceSlice:0 + sourceLevel:i - 1 + sourceOrigin:srcOrigin + sourceSize:srcSize + toTexture:texture + destinationSlice:0 + destinationLevel:i + destinationOrigin:dstOrigin]; + } - [texture replaceRegion:MTLRegionMake2D(0, 0, width, height) mipmapLevel:0 withBytes:temp_tex_buffer bytesPerRow:bpp * width]; + [blitEncoder endEncoding]; } bool MetalTexture::Delete() From aa1fd75f1280cffd9693b6f66e92c7a3af40a707 Mon Sep 17 00:00:00 2001 From: Isaac Marovitz Date: Thu, 29 May 2025 17:25:12 -0400 Subject: [PATCH 31/48] Update copyright Signed-off-by: Isaac Marovitz --- core/rend/metal/metal_buffer.h | 3 ++- core/rend/metal/metal_buffer.mm | 2 +- core/rend/metal/metal_context.h | 3 ++- core/rend/metal/metal_context.mm | 2 +- core/rend/metal/metal_driver.h | 3 ++- core/rend/metal/metal_pipeline.h | 3 ++- core/rend/metal/metal_pipeline.mm | 3 ++- core/rend/metal/metal_renderer.h | 3 ++- core/rend/metal/metal_renderer.mm | 2 +- core/rend/metal/metal_shaders.h | 3 ++- core/rend/metal/metal_shaders.mm | 2 +- core/rend/metal/metal_texture.h | 3 ++- core/rend/metal/metal_texture.mm | 2 +- 13 files changed, 21 insertions(+), 13 deletions(-) diff --git a/core/rend/metal/metal_buffer.h b/core/rend/metal/metal_buffer.h index 2426136b15..3e0a864bd9 100644 --- a/core/rend/metal/metal_buffer.h +++ b/core/rend/metal/metal_buffer.h @@ -1,5 +1,5 @@ /* - Copyright 2024 flyinghead + Copyright 2025 flyinghead This file is part of Flycast. @@ -16,6 +16,7 @@ You should have received a copy of the GNU General Public License along with Flycast. If not, see . */ + #pragma once #include "types.h" #include diff --git a/core/rend/metal/metal_buffer.mm b/core/rend/metal/metal_buffer.mm index 31ea2ec9f6..3f207f632c 100644 --- a/core/rend/metal/metal_buffer.mm +++ b/core/rend/metal/metal_buffer.mm @@ -1,5 +1,5 @@ /* - Copyright 2024 flyinghead + Copyright 2025 flyinghead This file is part of Flycast. diff --git a/core/rend/metal/metal_context.h b/core/rend/metal/metal_context.h index 80d1087b54..000e2cc7f4 100644 --- a/core/rend/metal/metal_context.h +++ b/core/rend/metal/metal_context.h @@ -1,5 +1,5 @@ /* -Copyright 2024 flyinghead + Copyright 2025 flyinghead This file is part of Flycast. @@ -16,6 +16,7 @@ Copyright 2024 flyinghead You should have received a copy of the GNU General Public License along with Flycast. If not, see . */ + #pragma once #include diff --git a/core/rend/metal/metal_context.mm b/core/rend/metal/metal_context.mm index 96c81ff47e..f5015bb9cd 100644 --- a/core/rend/metal/metal_context.mm +++ b/core/rend/metal/metal_context.mm @@ -1,5 +1,5 @@ /* -Copyright 2024 flyinghead + Copyright 2025 flyinghead This file is part of Flycast. diff --git a/core/rend/metal/metal_driver.h b/core/rend/metal/metal_driver.h index c810eb8c84..3f8ee472b2 100644 --- a/core/rend/metal/metal_driver.h +++ b/core/rend/metal/metal_driver.h @@ -1,5 +1,5 @@ /* - Copyright 2024 flyinghead + Copyright 2025 flyinghead This file is part of Flycast. @@ -16,6 +16,7 @@ You should have received a copy of the GNU General Public License along with Flycast. If not, see . */ + #pragma once #include "ui/imgui_driver.h" #include "imgui_impl_metal.h" diff --git a/core/rend/metal/metal_pipeline.h b/core/rend/metal/metal_pipeline.h index f52525c12a..d460e1b34e 100644 --- a/core/rend/metal/metal_pipeline.h +++ b/core/rend/metal/metal_pipeline.h @@ -1,5 +1,5 @@ /* - Copyright 2024 flyinghead + Copyright 2025 flyinghead This file is part of Flycast. @@ -16,6 +16,7 @@ You should have received a copy of the GNU General Public License along with Flycast. If not, see . */ + #pragma once #include "types.h" #include diff --git a/core/rend/metal/metal_pipeline.mm b/core/rend/metal/metal_pipeline.mm index ea6e0c1adb..a8d41826c5 100644 --- a/core/rend/metal/metal_pipeline.mm +++ b/core/rend/metal/metal_pipeline.mm @@ -1,5 +1,5 @@ /* - Copyright 2024 flyinghead + Copyright 2025 flyinghead This file is part of Flycast. @@ -16,6 +16,7 @@ You should have received a copy of the GNU General Public License along with Flycast. If not, see . */ + #include "metal_pipeline.h" #include "metal_context.h" diff --git a/core/rend/metal/metal_renderer.h b/core/rend/metal/metal_renderer.h index b9151679cf..405a3a61e3 100644 --- a/core/rend/metal/metal_renderer.h +++ b/core/rend/metal/metal_renderer.h @@ -1,5 +1,5 @@ /* - Copyright 2024 flyinghead + Copyright 2025 flyinghead This file is part of Flycast. @@ -16,6 +16,7 @@ You should have received a copy of the GNU General Public License along with Flycast. If not, see . */ + #pragma once #include "metal_pipeline.h" #include "metal_shaders.h" diff --git a/core/rend/metal/metal_renderer.mm b/core/rend/metal/metal_renderer.mm index a220c674b5..b449ece0c2 100644 --- a/core/rend/metal/metal_renderer.mm +++ b/core/rend/metal/metal_renderer.mm @@ -1,5 +1,5 @@ /* - Copyright 2024 flyinghead + Copyright 2025 flyinghead This file is part of Flycast. diff --git a/core/rend/metal/metal_shaders.h b/core/rend/metal/metal_shaders.h index 38d38873c8..132ca70507 100644 --- a/core/rend/metal/metal_shaders.h +++ b/core/rend/metal/metal_shaders.h @@ -1,5 +1,5 @@ /* -Copyright 2024 flyinghead + Copyright 2025 flyinghead This file is part of Flycast. @@ -16,6 +16,7 @@ Copyright 2024 flyinghead You should have received a copy of the GNU General Public License along with Flycast. If not, see . */ + #pragma once #include diff --git a/core/rend/metal/metal_shaders.mm b/core/rend/metal/metal_shaders.mm index 9b786cf2e1..28f4b11ee5 100644 --- a/core/rend/metal/metal_shaders.mm +++ b/core/rend/metal/metal_shaders.mm @@ -1,5 +1,5 @@ /* -Copyright 2024 flyinghead + Copyright 2025 flyinghead This file is part of Flycast. diff --git a/core/rend/metal/metal_texture.h b/core/rend/metal/metal_texture.h index 0a4bea4615..38e19b8416 100644 --- a/core/rend/metal/metal_texture.h +++ b/core/rend/metal/metal_texture.h @@ -1,5 +1,5 @@ /* - Copyright 2024 flyinghead + Copyright 2025 flyinghead This file is part of Flycast. @@ -16,6 +16,7 @@ You should have received a copy of the GNU General Public License along with Flycast. If not, see . */ + #pragma once #include "rend/TexCache.h" #include "metal_context.h" diff --git a/core/rend/metal/metal_texture.mm b/core/rend/metal/metal_texture.mm index 992f67e591..a5a1a49fea 100644 --- a/core/rend/metal/metal_texture.mm +++ b/core/rend/metal/metal_texture.mm @@ -1,5 +1,5 @@ /* - Copyright 2024 flyinghead + Copyright 2025 flyinghead This file is part of Flycast. From e2e0a7a5962a61f7370baf540ac11725094ce00f Mon Sep 17 00:00:00 2001 From: Isaac Marovitz Date: Fri, 30 May 2025 00:02:34 -0400 Subject: [PATCH 32/48] Major Refactor --- CMakeLists.txt | 4 + core/rend/metal/metal_context.h | 56 ++- core/rend/metal/metal_context.mm | 261 +++++++++- core/rend/metal/metal_drawer.h | 207 ++++++++ core/rend/metal/metal_drawer.mm | 682 +++++++++++++++++++++++++++ core/rend/metal/metal_driver.h | 43 +- core/rend/metal/metal_pipeline.h | 7 +- core/rend/metal/metal_pipeline.mm | 29 +- core/rend/metal/metal_quad.h | 115 +++++ core/rend/metal/metal_quad.mm | 119 +++++ core/rend/metal/metal_renderer.h | 126 ++--- core/rend/metal/metal_renderer.mm | 666 ++++++-------------------- core/rend/metal/metal_shaders.h | 63 ++- core/rend/metal/metal_shaders.mm | 107 ++++- core/rend/metal/metal_texture.h | 9 +- core/rend/vulkan/vulkan_renderer.cpp | 1 + core/sdl/sdl.cpp | 2 +- 17 files changed, 1828 insertions(+), 669 deletions(-) create mode 100644 core/rend/metal/metal_drawer.h create mode 100644 core/rend/metal/metal_drawer.mm create mode 100644 core/rend/metal/metal_quad.h create mode 100644 core/rend/metal/metal_quad.mm diff --git a/CMakeLists.txt b/CMakeLists.txt index a4a12779a6..615a2e62ff 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1544,6 +1544,10 @@ if(APPLE AND USE_METAL) core/rend/metal/metal_pipeline.mm core/rend/metal/metal_buffer.h core/rend/metal/metal_buffer.mm + core/rend/metal/metal_drawer.h + core/rend/metal/metal_drawer.mm + core/rend/metal/metal_quad.h + core/rend/metal/metal_quad.mm core/rend/metal/metal_driver.h core/deps/imgui/backends/imgui_impl_metal.h core/deps/imgui/backends/imgui_impl_metal.mm) diff --git a/core/rend/metal/metal_context.h b/core/rend/metal/metal_context.h index 000e2cc7f4..2acecc10c0 100644 --- a/core/rend/metal/metal_context.h +++ b/core/rend/metal/metal_context.h @@ -22,7 +22,9 @@ #include #include +#include "rend/transform_matrix.h" #include "wsi/context.h" +#include "metal_quad.h" class MetalContext : public GraphicsContext { @@ -32,12 +34,30 @@ class MetalContext : public GraphicsContext bool init(); void term() override; - void resize() override; + + void resize() override { resized = true; } + bool IsValid() { return width != 0 && height != 0; } + void NewFrame(); + void BeginRenderPass(); + void EndFrame(); void Present(); + void PresentFrame(id texture, MTLViewport viewport, float aspectRatio); + void PresentLastFrame(); + bool GetLastFrame(std::vector& data, int& width, int& height); id GetDevice() const { return device; } CAMetalLayer* GetLayer() const { return layer; } id GetQueue() const { return queue; } + MTLRenderPassDescriptor* GetDescriptor() const { + if (rendering) { + return renderPassDescriptor; + } + + // Hack to make sure imgui takes control of presenting pause menu + return nullptr; + } + id GetEncoder() const { return commandEncoder; } + id GetCommandBuffer() const { return commandBuffers[currentImage]; } std::string getDriverName() override; @@ -52,9 +72,43 @@ class MetalContext : public GraphicsContext bool hasPerPixel() override { return true; } + bool recreateSwapChainIfNeeded(); static MetalContext* Instance() { return contextInstance; } private: + void CreateSwapChain(); + void DrawFrame(id texture, MTLViewport viewport, float aspectRatio); + + bool HasSurfaceDimensionChanged() const; + void SetWindowSize(u32 width, u32 height); + + bool rendering = false; + bool renderDone = false; + u32 width = 0; + u32 height = 0; + bool resized = false; + bool swapOnVSync = true; + int swapInterval = 1; + + u32 currentImage = 0; + + id currentDrawable = nil; + MTLRenderPassDescriptor *renderPassDescriptor = nil; + + std::vector> commandBuffers; + id commandEncoder; + + std::unique_ptr quadPipelineWithAlpha; + std::unique_ptr quadPipeline; + std::unique_ptr quadRotatePipeline; + std::unique_ptr quadDrawer; + std::unique_ptr quadRotateDrawer; + std::unique_ptr shaderManager; + + id lastFrameTexture = nil; + MTLViewport lastFrameViewport; + float lastFrameAR = 0.f; + id device = nil; id queue = nil; CAMetalLayer* layer; diff --git a/core/rend/metal/metal_context.mm b/core/rend/metal/metal_context.mm index f5015bb9cd..13e7d0abce 100644 --- a/core/rend/metal/metal_context.mm +++ b/core/rend/metal/metal_context.mm @@ -23,10 +23,48 @@ #include "sdl/sdl.h" #endif #include "ui/imgui_driver.h" +#import "metal_buffer.h" MetalContext *MetalContext::contextInstance; -bool MetalContext::init() { +void MetalContext::CreateSwapChain() +{ + // WAIT IDLE + + commandBuffers.clear(); + + [layer setPixelFormat:MTLPixelFormatBGRA8Unorm]; + [layer setFramebufferOnly:TRUE]; + [layer setDisplaySyncEnabled:TRUE]; + [layer setColorspace:CGColorSpaceCreateWithName(kCGColorSpaceSRGB)]; + [layer setMaximumDrawableCount:3]; + + auto size = [layer drawableSize]; + width = size.width; + height = size.height; + SetWindowSize(width, height); + resized = false; + + if (swapOnVSync && config::DupeFrames && settings.display.refreshRate > 60.f) + swapInterval = settings.display.refreshRate / 60.f; + else + swapInterval = 1; + + commandBuffers.resize(3); + + quadPipeline->Init(shaderManager.get()); + quadPipelineWithAlpha->Init(shaderManager.get()); + quadDrawer->Init(quadPipeline.get()); + quadRotatePipeline->Init(shaderManager.get()); + quadRotateDrawer->Init(quadRotatePipeline.get()); + + currentImage = 2; + + ERROR_LOG(RENDERER, "Metal swap chain created: %d x %d, swap chain size %d", width, height, 3); +} + +bool MetalContext::init() +{ GraphicsContext::instance = this; #ifdef USE_SDL @@ -55,22 +93,185 @@ [layer setDevice:device]; queue = [device newCommandQueue]; + shaderManager = std::make_unique(); + quadPipeline = std::make_unique(true, false); + quadPipelineWithAlpha = std::make_unique(false, false); + quadDrawer = std::make_unique(); + quadRotatePipeline = std::make_unique(true, true); + quadRotateDrawer = std::make_unique(); + NOTICE_LOG(RENDERER, "Created Metal view."); imguiDriver = std::unique_ptr(new MetalDriver()); - return true; -} -void MetalContext::resize() { + CreateSwapChain(); + return true; } std::string MetalContext::getDriverName() { return [[device name] UTF8String]; } -void MetalContext::Present() { +bool MetalContext::recreateSwapChainIfNeeded() +{ + if (resized || HasSurfaceDimensionChanged()) + { + CreateSwapChain(); + lastFrameTexture = nil; + return true; + } + else + return false; +} + +void MetalContext::BeginRenderPass() { + recreateSwapChainIfNeeded(); + if (!IsValid()) + return; + + currentDrawable = [layer nextDrawable]; + + if (!renderPassDescriptor) { + renderPassDescriptor = [[MTLRenderPassDescriptor alloc] init]; + } + + auto colorAttachment = renderPassDescriptor.colorAttachments[0]; + [colorAttachment setTexture:currentDrawable.texture]; + [colorAttachment setLoadAction:MTLLoadActionClear]; + [colorAttachment setStoreAction:MTLStoreActionStore]; + [colorAttachment setClearColor:MTLClearColorMake(VO_BORDER_COL.red(), VO_BORDER_COL.green(), VO_BORDER_COL.blue(), 1.0f)]; + + if (currentImage >= commandBuffers.size()) { + commandBuffers.resize(currentImage + 1); + } + + if (!commandBuffers[currentImage]) { + commandBuffers[currentImage] = [queue commandBuffer]; + [commandBuffers[currentImage] setLabel:@"Render Frame"]; + } + + commandEncoder = [commandBuffers[currentImage] renderCommandEncoderWithDescriptor: renderPassDescriptor]; + [commandBuffers[currentImage] presentDrawable:currentDrawable]; +}; + +void MetalContext::NewFrame() { + if (!IsValid()) + return; + + currentImage = (currentImage + 1) % 3; + currentDrawable = nil; + verify(!rendering); + rendering = true; +} + +void MetalContext::EndFrame() { + if (!IsValid()) + return; + + [commandEncoder endEncoding]; + [commandBuffers[currentImage] commit]; + [commandBuffers[currentImage] waitUntilCompleted]; + commandBuffers[currentImage] = nil; + + verify(rendering); + rendering = false; + renderDone = true; +} + +void MetalContext::Present() +{ + if (renderDone) + { + if (lastFrameTexture != nil && IsValid() && !gui_is_open()) + for (int i = 1; i < swapInterval; i++) + { + PresentFrame(lastFrameTexture, lastFrameViewport, lastFrameAR); + } + renderDone = false; + } + if (swapOnVSync == (settings.input.fastForwardMode || !config::VSync)) + { + swapOnVSync = (!settings.input.fastForwardMode && config::VSync); + resized = true; + } + if (resized) + CreateSwapChain(); + lastFrameTexture = nil; +} + +void MetalContext::DrawFrame(id texture, MTLViewport viewport, float aspectRatio) { + MetalQuadVertex vtx[4] { + { -1, -1, 0, 0, 1 }, + { 1, -1, 0, 1, 1 }, + { -1, 1, 0, 0, 0 }, + { 1, 1, 0, 1, 0 }, + }; + float shiftX, shiftY; + getVideoShift(shiftX, shiftY); + vtx[0].x = vtx[2].x = -1.f + shiftX * 2.f / viewport.width; + vtx[1].x = vtx[3].x = vtx[0].x + 2; + vtx[0].y = vtx[1].y = -1.f + shiftY * 2.f / viewport.height; + vtx[2].y = vtx[3].y = vtx[0].y + 2; + + [commandEncoder pushDebugGroup:@"DrawFrame"]; + + if (config::Rotate90) + quadRotatePipeline->BindPipeline(commandEncoder); + else + quadPipeline->BindPipeline(commandEncoder); + + float screenAR = (float)width / height; + float dx = 0; + float dy = 0; + if (aspectRatio > screenAR) + dy = height * (1 - screenAR / aspectRatio) / 2; + else + dx = width * (1 - aspectRatio / screenAR) / 2; + + MTLViewport framePort = { dx, dy, width - dx * 2, height - dy * 2, 0, 1 }; + [commandEncoder setViewport:framePort]; + [commandEncoder setScissorRect:MTLScissorRect { (uint)dx, (uint)dy, (uint)(width - dx * 2), (uint)(height - dy * 2) }]; + if (config::Rotate90) + quadRotateDrawer->Draw(commandEncoder, texture, vtx, config::TextureFiltering == 1); + else + quadDrawer->Draw(commandEncoder, texture, vtx, config::TextureFiltering == 1); + + [commandEncoder popDebugGroup]; +} + +void MetalContext::PresentFrame(id texture, MTLViewport viewport, float aspectRatio) +{ + lastFrameTexture = texture; + lastFrameViewport = viewport; + lastFrameAR = aspectRatio; + + if (texture != nil && IsValid()) + { + NewFrame(); + + BeginRenderPass(); + + gui_draw_osd(); + if (lastFrameTexture != nil) // Might have been nullified if swap chain recreated + DrawFrame(texture, viewport, aspectRatio); + + imguiDriver->renderDrawData(ImGui::GetDrawData(), false); + EndFrame(); + } + else { + if (!IsValid()) + { + ERROR_LOG(RENDERER, "NOT PRESENTING INVALID SIZE!"); + } + } +} + +void MetalContext::PresentLastFrame() +{ + if (lastFrameTexture != nil && IsValid()) + DrawFrame(lastFrameTexture, lastFrameViewport, lastFrameAR); } void MetalContext::term() { @@ -78,6 +279,29 @@ imguiDriver.reset(); } +bool MetalContext::HasSurfaceDimensionChanged() const +{ + auto size = [layer drawableSize]; + return width != size.width || height != size.height; +} + +void MetalContext::SetWindowSize(u32 width, u32 height) +{ + if (this->width != width && this->height != height) + { + this->width = width; + this->height = height; + + if (width != 0) + settings.display.width = width; + + if (height != 0) + settings.display.height = height; + + resize(); + } +} + MetalContext::MetalContext() { verify(contextInstance == nullptr); contextInstance = this; @@ -88,3 +312,30 @@ contextInstance = nullptr; } +bool MetalContext::GetLastFrame(std::vector &data, int &width, int &height) +{ + if (lastFrameTexture == nil) + return false; + + if (width != 0) { + height = width / lastFrameAR; + } + else if (height != 0) { + width = lastFrameAR * height; + } + else + { + width = lastFrameViewport.width; + height = lastFrameViewport.height; + if (config::Rotate90) + std::swap(width, height); + // We need square pixels for PNG + int w = lastFrameAR * height; + if (width > w) + height = width / lastFrameAR; + else + width = w; + } + + return true; +} diff --git a/core/rend/metal/metal_drawer.h b/core/rend/metal/metal_drawer.h new file mode 100644 index 0000000000..8dac657946 --- /dev/null +++ b/core/rend/metal/metal_drawer.h @@ -0,0 +1,207 @@ +/* + Copyright 2025 flyinghead + + This file is part of Flycast. + + Flycast is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + Flycast is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Flycast. If not, see . +*/ + +#pragma once + +#include "metal_texture.h" +#include "metal_shaders.h" +#include "metal_pipeline.h" +#include "metal_buffer.h" +#include "rend/tileclip.h" +#include "rend/transform_matrix.h" +#include "rend/sorter.h" +#include "hw/pvr/pvr_mem.h" + +class MetalBaseDrawer +{ +protected: + TileClipping SetTileClip(id encoder, u32 val, MTLScissorRect& clipRect); + void SetBaseScissor(MTLViewport viewport); + + void SetScissor(id encoder, const MTLScissorRect& scissor) + { + if (scissor.x != currentScissor.x || + scissor.y != currentScissor.y || + scissor.width != currentScissor.width || + scissor.height != currentScissor.height) + { + [encoder setScissorRect:scissor]; + currentScissor = scissor; + } + } + + MetalBufferData* GetMainBuffer(u32 size) + { + MetalBufferData *buffer; + if (!mainBuffers.empty()) + { + buffer = mainBuffers.back().release(); + mainBuffers.pop_back(); + if (buffer->bufferSize < size) { + u32 newSize = (u32)buffer->bufferSize; + while (newSize < size) + newSize *= 2; + + INFO_LOG(RENDERER, "Increasing main buffer size %zd -> %d", buffer->bufferSize, newSize); + [buffer->buffer setPurgeableState: MTLPurgeableStateEmpty]; + + buffer = new MetalBufferData(newSize); + } + } + else + { + buffer = new MetalBufferData(std::max(512 * 1024u, size)); + } + + return buffer; + } + + template + T MakeFragmentUniforms() + { + T fragUniforms; + + //VERT and RAM fog color constants + FOG_COL_VERT.getRGBColor(fragUniforms.sp_FOG_COL_VERT); + FOG_COL_RAM.getRGBColor(fragUniforms.sp_FOG_COL_RAM); + + //Fog density constant + fragUniforms.sp_FOG_DENSITY = FOG_DENSITY.get() * config::ExtraDepthScale; + + pvrrc.fog_clamp_min.getRGBAColor(fragUniforms.colorClampMin); + pvrrc.fog_clamp_max.getRGBAColor(fragUniforms.colorClampMax); + + fragUniforms.cp_AlphaTestValue = (PT_ALPHA_REF & 0xFF) / 255.0f; + + return fragUniforms; + } + + MTLScissorRect baseScissor {}; + MTLScissorRect currentScissor {}; + TransformMatrix matrices; + std::vector> mainBuffers; +}; + +class MetalDrawer : public MetalBaseDrawer +{ +public: + virtual ~MetalDrawer() = default; + + bool Draw(const MetalTexture *fogTexture, const MetalTexture *paletteTexture, id commandBuffer); + virtual void EndRenderPass() { + renderPassStarted = false; + } + + virtual void Term() { + + } + +protected: + virtual id BeginRenderPass(id commandBuffer) = 0; + void Init(MetalSamplers *samplers, MetalPipelineManager pipelineManager) { + this->samplers = samplers; + this->pipelineManager = std::make_unique(pipelineManager); + } + + int GetCurrentImage() const { return imageIndex; } + + id currentEncoder = nil; + MetalSamplers *samplers = nullptr; + bool renderPassStarted = false; + +private: + void DrawPoly(id encoder, u32 listType, bool sortTriangles, const PolyParam& poly, u32 first, u32 count); + void DrawSorted(id encoder, const std::vector& polys, u32 first, u32 last, bool multipass); + void DrawList(id encoder, u32 listType, bool sortTriangles, const std::vector& polys, u32 first, u32 last); + void DrawModVols(id encoder, int first, int count); + void UploadMainBuffer(const MetalVertexShaderUniforms& vertexUniforms, const MetalFragmentShaderUniforms& fragmentUniforms); + + int imageIndex = 0; + struct { + u64 indexOffset = 0; + u64 modVolOffset = 0; + u64 vertexUniformOffset = 0; + u64 fragmentUniformOffset = 0; + u64 naomi2OpaqueOffset = 0; + u64 naomi2PunchThroughOffset = 0; + u64 naomi2TranslucentOffset = 0; + u64 naomi2ModVolOffset = 0; + u64 naomi2TrModVolOffset = 0; + u64 lightsOffset = 0; + } offsets; + id curMainBuffer = nil; + std::unique_ptr pipelineManager = nullptr; + bool dithering = false; +}; + +class MetalScreenDrawer : public MetalDrawer +{ +public: + void Init(MetalSamplers *samplers, MetalShaders *shaders, const MTLViewport& viewport); + + void EndRenderPass() override; + bool PresentFrame() + { + EndRenderPass(); + if (!frameRendered) + return false; + frameRendered = false; + MetalContext::Instance()->PresentFrame(framebuffers[GetCurrentImage()], viewport, aspectRatio); + + return true; + } + +protected: + id BeginRenderPass(id commandBuffer) override; + +private: + std::vector> framebuffers; + std::vector loadPassDescriptors; + std::vector clearPassDescriptors; + id depthAttachment; + MTLViewport viewport; + MetalShaders *shaderManager = nullptr; + std::vector clearNeeded; + bool frameRendered = false; + float aspectRatio = 0.f; + bool emulateFramebuffer = false; +}; + +class MetalTextureDrawer : public MetalDrawer +{ +public: + void Init(MetalSamplers *samplers, MetalShaders *shaders, MetalTextureCache *textureCache); + + void EndRenderPass() override; + +protected: + id BeginRenderPass(id commandBuffer) override; + +private: + u32 width = 0; + u32 height = 0; + u32 textureAddr = 0; + + MetalTexture *texture = nullptr; + std::vector> framebuffers; + MTLRenderPassDescriptor *rttPassDescriptor = nil; + id colorAttachment; + id depthAttachment; + MetalTextureCache *textureCache = nullptr; +}; \ No newline at end of file diff --git a/core/rend/metal/metal_drawer.mm b/core/rend/metal/metal_drawer.mm new file mode 100644 index 0000000000..ed4742831b --- /dev/null +++ b/core/rend/metal/metal_drawer.mm @@ -0,0 +1,682 @@ +/* + Copyright 2025 flyinghead + + This file is part of Flycast. + + Flycast is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + Flycast is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Flycast. If not, see . +*/ + +#include "metal_drawer.h" + +TileClipping MetalBaseDrawer::SetTileClip(id encoder, u32 val, MTLScissorRect& clipRect) { + int rect[4] = {}; + TileClipping clipMode = GetTileClip(val, matrices.GetViewportMatrix(), rect); + if (clipMode != TileClipping::Off) + { + clipRect.x = rect[0]; + clipRect.y = rect[1]; + clipRect.width = rect[2]; + clipRect.height = rect[3]; + } + if (clipMode == TileClipping::Outside) + SetScissor(encoder, clipRect); + else + SetScissor(encoder, baseScissor); + + return clipMode; +} + +void MetalBaseDrawer::SetBaseScissor(MTLViewport viewport) { + bool wide_screen_on = config::Widescreen + && !matrices.IsClipped() && !config::Rotate90 && !config::EmulateFramebuffer; + if (!wide_screen_on) + { + float width; + float height; + float min_x; + float min_y; + glm::vec4 clip_min(pvrrc.fb_X_CLIP.min, pvrrc.fb_Y_CLIP.min, 0, 1); + glm::vec4 clip_dim(pvrrc.fb_X_CLIP.max - pvrrc.fb_X_CLIP.min + 1, + pvrrc.fb_Y_CLIP.max - pvrrc.fb_Y_CLIP.min + 1, 0, 0); + clip_min = matrices.GetScissorMatrix() * clip_min; + clip_dim = matrices.GetScissorMatrix() * clip_dim; + + min_x = clip_min[0]; + min_y = clip_min[1]; + width = clip_dim[0]; + height = clip_dim[1]; + if (width < 0) + { + min_x += width; + width = -width; + } + if (height < 0) + { + min_y += height; + height = -height; + } + + baseScissor = MTLScissorRect(); + baseScissor.x = std::max(lroundf(min_x), 0L); + baseScissor.y = std::max(lroundf(min_y), 0L); + baseScissor.width = std::max(lroundf(width), 0L); + baseScissor.height = std::max(lroundf(height), 0L); + } + else + { + baseScissor = MTLScissorRect(); + baseScissor.x = 0; + baseScissor.y = 0; + baseScissor.width = viewport.width; + baseScissor.height = viewport.height; + } +} + +void MetalDrawer::DrawPoly(id encoder, u32 listType, bool sortTriangles, const PolyParam &poly, u32 first, u32 count) +{ + MTLScissorRect scissorRect {}; + TileClipping tileClip = SetTileClip(encoder, poly.tileclip, scissorRect); + + float trilinearAlpha = 1.0f; + if (poly.tsp.FilterMode > 1 && poly.pcw.Texture && listType != ListType_Punch_Through && poly.tcw.MipMapped == 1) + { + trilinearAlpha = 0.25f * (poly.tsp.MipMapD & 0x3); + if (poly.tsp.FilterMode == 2) + // Trilinear pass A + trilinearAlpha = 1.0f - trilinearAlpha; + } + int gpuPalette = poly.texture == nullptr || !poly.texture->gpuPalette ? 0 + : poly.tsp.FilterMode + 1; + float palette_index = 0.0f; + if (gpuPalette != 0) + { + if (config::TextureFiltering == 1) + gpuPalette = 1; + else if (config::TextureFiltering == 2) + gpuPalette = 2; + if (poly.tcw.PixelFmt == PixelPal4) + palette_index = float(poly.tcw.PalSelect << 4) / 1023.0f; + else + palette_index = float(poly.tcw.PalSelect >> 4 << 8) / 1023.0f; + } + + std::array pushConstants; + + if (tileClip == TileClipping::Inside || trilinearAlpha != 1.0f || gpuPalette != 0) + { + pushConstants = { + (float)scissorRect.x, + (float)scissorRect.y, + (float)scissorRect.x + (float)scissorRect.width, + (float)scissorRect.y + (float)scissorRect.height, + trilinearAlpha, + palette_index + }; + } else { + pushConstants = { 0, 0, 0, 0, 0, 0 }; + } + + [encoder setFragmentBytes:pushConstants.data() length:sizeof(pushConstants) + MetalBufferPacker::align(sizeof(pushConstants), 16) atIndex:1]; + + bool shadowed = listType == ListType_Opaque || listType == ListType_Punch_Through; + + [encoder setRenderPipelineState:pipelineManager->GetPipeline(listType, sortTriangles, poly, gpuPalette, dithering)]; + [encoder setDepthStencilState:pipelineManager->GetDepthStencilStates(listType, sortTriangles, shadowed, poly)]; + + if (shadowed) { + if (poly.pcw.Shadow != 0) { + [encoder setStencilReferenceValue:0x80]; + } else { + [encoder setStencilReferenceValue:0]; + } + } + + if (poly.texture != nullptr) { + auto texture = ((MetalTexture *)poly.texture)->GetTexture(); + [encoder setFragmentTexture:texture atIndex:0]; + + // Texture sampler + [encoder setFragmentSamplerState:samplers->GetSampler(poly, listType == ListType_Punch_Through) atIndex:0]; + } + + if (poly.pcw.Texture || poly.isNaomi2()) + { + u32 index = 0; + if (poly.isNaomi2()) + { + + } + + // TODO: Bind Texture & Naomi2 Lights Buffers + } + + MTLPrimitiveType primitive = sortTriangles && !config::PerStripSorting ? MTLPrimitiveTypeTriangle : MTLPrimitiveTypeTriangleStrip; + + [encoder drawIndexedPrimitives:primitive + indexCount:count + indexType:MTLIndexTypeUInt32 + indexBuffer:curMainBuffer + indexBufferOffset:offsets.indexOffset + first * sizeof(u32)]; +} + +void MetalDrawer::DrawSorted(id encoder, const std::vector &polys, u32 first, u32 last, bool multipass) +{ + if (first == last) + return; + + [encoder pushDebugGroup:@"DrawSorted"]; + + for (u32 idx = first; idx < last; idx++) + DrawPoly(encoder, ListType_Translucent, true, pvrrc.global_param_tr[polys[idx].polyIndex], polys[idx].first, polys[idx].count); + if (multipass && config::TranslucentPolygonDepthMask) + { + // Write to the depth buffer now. The next render pass might need it. (Cosmic Smash) + for (u32 idx = first; idx < last; idx++) + { + const SortedTriangle& param = polys[idx]; + const PolyParam& polyParam = pvrrc.global_param_tr[param.polyIndex]; + if (polyParam.isp.ZWriteDis) + continue; + [encoder setRenderPipelineState:pipelineManager->GetDepthPassPipeline(polyParam.isp.CullMode, polyParam.isNaomi2())]; + [encoder setDepthStencilState:pipelineManager->GetDepthPassDepthStencilStates(polyParam.isp.CullMode, polyParam.isNaomi2())]; + MTLScissorRect scissorRect {}; + SetTileClip(encoder, polyParam.tileclip, scissorRect); + [encoder drawIndexedPrimitives:MTLPrimitiveTypeTriangle + indexCount:param.count + indexType:MTLIndexTypeUInt32 + indexBuffer:curMainBuffer + indexBufferOffset:offsets.indexOffset + param.first * sizeof(u32)]; + } + } + + [encoder popDebugGroup]; +} + +void MetalDrawer::DrawList(id encoder, u32 listType, bool sortTriangles, const std::vector &polys, u32 first, u32 last) +{ + if (first == last) + return; + + [encoder pushDebugGroup:@"DrawList"]; + + const PolyParam *pp_end = polys.data() + last; + for (const PolyParam *pp = &polys[first]; pp != pp_end; pp++) + if (pp->count > 2) + DrawPoly(encoder, listType, sortTriangles, *pp, pp->first, pp->count); + + [encoder popDebugGroup]; +} + +void MetalDrawer::DrawModVols(id encoder, int first, int count) +{ + if (count == 0 || pvrrc.modtrig.empty() || !config::ModifierVolumes) + return; + + [encoder pushDebugGroup:@"DrawModVols"]; + [encoder setVertexBufferOffset:offsets.modVolOffset atIndex:30]; + + ModifierVolumeParam* params = &pvrrc.global_param_mvo[first]; + + int mod_base = -1; + id state; + id depth_state; + + const std::array pushConstants = { 1 - FPU_SHAD_SCALE.scale_factor / 256.f }; + [encoder setFragmentBytes:pushConstants.data() length:sizeof(pushConstants) + MetalBufferPacker::align(sizeof(pushConstants), 16) atIndex:1]; + + for (int cmv = 0; cmv < count; cmv++) { + ModifierVolumeParam& param = params[cmv]; + MTLCullMode cull_mode = param.isp.CullMode == 3 ? MTLCullModeBack : param.isp.CullMode == 2 ? MTLCullModeFront : MTLCullModeNone; + [encoder setCullMode:cull_mode]; + [encoder setFrontFacingWinding:MTLWindingCounterClockwise]; + + if (param.count == 0) + continue; + + u32 mv_mode = param.isp.DepthMode; + + if (mod_base == -1) + mod_base = param.first; + + if (!param.isp.VolumeLast && mv_mode > 0) { + state = pipelineManager->GetModifierVolumePipeline(ModVolMode::Or, param.isp.CullMode, param.isNaomi2()); // OR'ing (open volume or quad) + depth_state = pipelineManager->GetModVolDepthStencilStates(ModVolMode::Or, param.isp.CullMode, param.isNaomi2()); + } else { + state = pipelineManager->GetModifierVolumePipeline(ModVolMode::Xor, param.isp.CullMode, param.isNaomi2()); // XOR'ing (closed volume) + depth_state = pipelineManager->GetModVolDepthStencilStates(ModVolMode::Xor, param.isp.CullMode, param.isNaomi2()); + } + + [encoder setRenderPipelineState:state]; + [encoder setDepthStencilState:depth_state]; + [encoder setStencilReferenceValue:2]; + MTLScissorRect scissorRect {}; + SetTileClip(encoder, param.tileclip, scissorRect); + // TODO inside clipping + + [encoder drawPrimitives:MTLPrimitiveTypeTriangle + vertexStart:param.first * 3 + vertexCount:param.count * 3]; + + if (mv_mode == 1 || mv_mode == 2) + { + // Sum the area + state = pipelineManager->GetModifierVolumePipeline(mv_mode == 1 ? ModVolMode::Inclusion : ModVolMode::Exclusion, param.isp.CullMode, param.isNaomi2()); + depth_state = pipelineManager->GetModVolDepthStencilStates(mv_mode == 1 ? ModVolMode::Inclusion : ModVolMode::Exclusion, param.isp.CullMode, param.isNaomi2()); + [encoder setRenderPipelineState:state]; + [encoder setDepthStencilState:depth_state]; + [encoder setStencilReferenceValue:1]; + [encoder drawPrimitives:MTLPrimitiveTypeTriangle + vertexStart: mod_base * 3 + vertexCount: (param.first + param.count - mod_base) * 3]; + mod_base = -1; + } + } + [encoder setVertexBufferOffset:0 atIndex:30]; + + state = pipelineManager->GetModifierVolumePipeline(ModVolMode::Final, 0, false); + depth_state = pipelineManager->GetModVolDepthStencilStates(ModVolMode::Final, 0, false); + [encoder setRenderPipelineState:state]; + [encoder setDepthStencilState:depth_state]; + [encoder setStencilReferenceValue:0x81]; + [encoder drawIndexedPrimitives:MTLPrimitiveTypeTriangleStrip + indexCount:4 + indexType:MTLIndexTypeUInt32 + indexBuffer:curMainBuffer + indexBufferOffset:offsets.indexOffset]; + + [encoder popDebugGroup]; +} + +void MetalDrawer::UploadMainBuffer(const MetalVertexShaderUniforms &vertexUniforms, const MetalFragmentShaderUniforms &fragmentUniforms) { + MetalBufferPacker packer; + + // Vertex + packer.add(pvrrc.verts.data(), pvrrc.verts.size() * sizeof(decltype(*pvrrc.verts.data()))); + // Modifier Volumes + offsets.modVolOffset = packer.add(pvrrc.modtrig.data(), pvrrc.modtrig.size() * sizeof(decltype(*pvrrc.modtrig.data()))); + // Index + offsets.indexOffset = packer.add(pvrrc.idx.data(), pvrrc.idx.size() * sizeof(decltype(*pvrrc.idx.data()))); + // Uniform buffers + offsets.vertexUniformOffset = packer.addUniform(&vertexUniforms, sizeof(vertexUniforms)); + offsets.fragmentUniformOffset = packer.addUniform(&fragmentUniforms, sizeof(fragmentUniforms)); + + std::vector n2uniforms; + if (settings.platform.isNaomi2()) + { + // packNaomi2Uniforms(packer, offsets, n2uniforms, false); + // offsets.lightsOffset = packNaomi2Lights(packer); + } + + MetalBufferData *buffer = GetMainBuffer(packer.size()); + packer.upload(*buffer); + curMainBuffer = buffer->buffer; +} + +bool MetalDrawer::Draw(const MetalTexture *fogTexture, const MetalTexture *paletteTexture, id commandBuffer) { + MetalFragmentShaderUniforms fragUniforms = MakeFragmentUniforms(); + dithering = config::EmulateFramebuffer && pvrrc.fb_W_CTRL.fb_dither && pvrrc.fb_W_CTRL.fb_packmode <= 3; + if (dithering) { + switch (pvrrc.fb_W_CTRL.fb_packmode) + { + case 0: // 0555 KRGB 16 bit + case 3: // 1555 ARGB 16 bit + fragUniforms.ditherDivisor[0] = fragUniforms.ditherDivisor[1] = fragUniforms.ditherDivisor[2] = 2.f; + break; + case 1: // 565 RGB 16 bit + fragUniforms.ditherDivisor[0] = fragUniforms.ditherDivisor[2] = 2.f; + fragUniforms.ditherDivisor[1] = 4.f; + break; + case 2: // 4444 ARGB 16 bit + fragUniforms.ditherDivisor[0] = fragUniforms.ditherDivisor[1] = fragUniforms.ditherDivisor[2] = 1.f; + break; + default: + break; + } + fragUniforms.ditherDivisor[3] = 1.f; + } + + currentScissor = MTLScissorRect {}; + + @autoreleasepool { + id renderEncoder = BeginRenderPass(commandBuffer); + [renderEncoder retain]; + + [renderEncoder setFragmentTexture:fogTexture->GetTexture() atIndex:2]; + [renderEncoder setFragmentTexture:paletteTexture->GetTexture() atIndex:3]; + + // Fog sampler + TSP fogTsp = {}; + fogTsp.FilterMode = 1; + fogTsp.ClampU = 1; + fogTsp.ClampV = 1; + [renderEncoder setFragmentSamplerState:samplers->GetSampler(fogTsp) atIndex:2]; + + // Palette sampler + TSP palTsp = {}; + palTsp.FilterMode = 0; + palTsp.ClampU = 1; + palTsp.ClampV = 1; + [renderEncoder setFragmentSamplerState:samplers->GetSampler(palTsp) atIndex:3]; + + setFirstProvokingVertex(pvrrc); + + // Upload vertex and index buffers + MetalVertexShaderUniforms vtxUniforms {}; + vtxUniforms.ndcMat = matrices.GetNormalMatrix(); + + UploadMainBuffer(vtxUniforms, fragUniforms); + + [renderEncoder setVertexBuffer:curMainBuffer offset:0 atIndex:30]; + [renderEncoder setVertexBuffer:curMainBuffer offset:offsets.vertexUniformOffset atIndex:0]; + [renderEncoder setFragmentBuffer:curMainBuffer offset:offsets.fragmentUniformOffset atIndex:0]; + + RenderPass previous_pass {}; + for (int render_pass = 0; render_pass < (int)pvrrc.render_passes.size(); render_pass++) { + const RenderPass& current_pass = pvrrc.render_passes[render_pass]; + + DEBUG_LOG(RENDERER, "Render pass %d OP %d PT %d TR %d MV %d autosort %d", render_pass + 1, + current_pass.op_count - previous_pass.op_count, + current_pass.pt_count - previous_pass.pt_count, + current_pass.tr_count - previous_pass.tr_count, + current_pass.mvo_count - previous_pass.mvo_count, current_pass.autosort); + DrawList(renderEncoder, ListType_Opaque, false, pvrrc.global_param_op, previous_pass.op_count, current_pass.op_count); + DrawList(renderEncoder, ListType_Punch_Through, false, pvrrc.global_param_pt, previous_pass.pt_count, current_pass.pt_count); + DrawModVols(renderEncoder, previous_pass.mvo_count, current_pass.mvo_count - previous_pass.mvo_count); + if (current_pass.autosort) { + if (!config::PerStripSorting) + DrawSorted(renderEncoder, pvrrc.sortedTriangles, previous_pass.sorted_tr_count, current_pass.sorted_tr_count, render_pass + 1 < (int)pvrrc.render_passes.size()); + else + DrawList(renderEncoder, ListType_Translucent, true, pvrrc.global_param_tr, previous_pass.tr_count, current_pass.tr_count); + } else { + // TODO: This breaking? + // DrawList(renderEncoder, ListType_Translucent, false, pvrrc.global_param_tr, previous_pass.tr_count, current_pass.tr_count); + } + previous_pass = current_pass; + } + } + + curMainBuffer = nil; + + return !pvrrc.isRTT; +} + +void MetalTextureDrawer::Init(MetalSamplers *samplers, MetalShaders *shaders, MetalTextureCache *textureCache) +{ + MetalDrawer::Init(samplers, MetalPipelineManager(shaders)); + + this->textureCache = textureCache; + + rttPassDescriptor = [[MTLRenderPassDescriptor alloc] init]; +} + +id MetalTextureDrawer::BeginRenderPass(id commandBuffer) { + DEBUG_LOG(RENDERER, "RenderToTexture packmode=%d stride=%d - %d x %d @ %06x", pvrrc.fb_W_CTRL.fb_packmode, pvrrc.fb_W_LINESTRIDE * 8, + pvrrc.fb_X_CLIP.max + 1, pvrrc.fb_Y_CLIP.max + 1, pvrrc.fb_W_SOF1 & VRAM_MASK); + matrices.CalcMatrices(&pvrrc); + + textureAddr = pvrrc.fb_W_SOF1 & VRAM_MASK; + u32 origWidth = pvrrc.getFramebufferWidth(); + u32 origHeight = pvrrc.getFramebufferHeight(); + u32 upscaledWidth = origWidth; + u32 upscaledHeight = origHeight; + u32 widthPow2; + u32 heightPow2; + getRenderToTextureDimensions(upscaledWidth, upscaledHeight, widthPow2, heightPow2); + + if (!depthAttachment || widthPow2 > depthAttachment.width || heightPow2 > depthAttachment.height) + { + MTLTextureDescriptor *depthDescriptor = [[MTLTextureDescriptor alloc] init]; + depthDescriptor.width = widthPow2; + depthDescriptor.height = heightPow2; + depthDescriptor.pixelFormat = MTLPixelFormatDepth32Float_Stencil8; + depthDescriptor.usage = MTLTextureUsageRenderTarget; + depthDescriptor.storageMode = MTLStorageModePrivate; + + depthAttachment = [MetalContext::Instance()->GetDevice() newTextureWithDescriptor:depthDescriptor]; + [depthAttachment setLabel:@"Rtt Depth Attachment"]; + } + + id colorImage; + + if (!config::RenderToTextureBuffer) + { + texture = textureCache->getRTTexture(textureAddr, pvrrc.fb_W_CTRL.fb_packmode, origWidth, origHeight); + + // Check if we need to recreate the texture + bool needsRecreation = !texture->GetTexture() || + texture->GetTexture().width != widthPow2 || + texture->GetTexture().height != heightPow2; + + if (needsRecreation) + { + MTLTextureDescriptor *colorDescriptor = [[MTLTextureDescriptor alloc] init]; + colorDescriptor.width = widthPow2; + colorDescriptor.height = heightPow2; + colorDescriptor.pixelFormat = MTLPixelFormatRGBA8Unorm; + colorDescriptor.usage = MTLTextureUsageRenderTarget | MTLTextureUsageShaderRead; + colorDescriptor.storageMode = MTLStorageModePrivate; + + id newTexture = [MetalContext::Instance()->GetDevice() newTextureWithDescriptor:colorDescriptor]; + [newTexture setLabel:@"Rtt Color Attachment"]; + texture->SetTexture(newTexture, widthPow2, heightPow2); + } + colorImage = texture->GetTexture(); + } + else + { + if (!colorAttachment || widthPow2 > colorAttachment.width || heightPow2 > colorAttachment.height) + { + MTLTextureDescriptor *colorDescriptor = [[MTLTextureDescriptor alloc] init]; + colorDescriptor.width = widthPow2; + colorDescriptor.height = heightPow2; + colorDescriptor.pixelFormat = MTLPixelFormatRGBA8Unorm; + colorDescriptor.usage = MTLTextureUsageRenderTarget; + colorDescriptor.storageMode = MTLStorageModePrivate; + + colorAttachment = [MetalContext::Instance()->GetDevice() newTextureWithDescriptor:colorDescriptor]; + [colorAttachment setLabel:@"Rtt Color Attachment"]; + } + colorImage = colorAttachment; + } + + auto colorAttachmentDesc = rttPassDescriptor.colorAttachments[0]; + [colorAttachmentDesc setTexture:colorImage]; + [colorAttachmentDesc setLoadAction:MTLLoadActionClear]; + [colorAttachmentDesc setStoreAction:MTLStoreActionStore]; + [colorAttachmentDesc setClearColor:MTLClearColorMake(0.0, 0.0, 0.0, 1.0)]; + + auto depthAttachmentDesc = rttPassDescriptor.depthAttachment; + [depthAttachmentDesc setTexture:depthAttachment]; + [depthAttachmentDesc setLoadAction:MTLLoadActionClear]; + [depthAttachmentDesc setStoreAction:MTLStoreActionDontCare]; + [depthAttachmentDesc setClearDepth:0.0]; + + auto stencilAttachmentDesc = rttPassDescriptor.stencilAttachment; + [stencilAttachmentDesc setTexture:depthAttachment]; + [stencilAttachmentDesc setLoadAction:MTLLoadActionClear]; + [stencilAttachmentDesc setStoreAction:MTLStoreActionDontCare]; + [stencilAttachmentDesc setClearStencil:0]; + + currentEncoder = [commandBuffer renderCommandEncoderWithDescriptor:rttPassDescriptor]; + [currentEncoder pushDebugGroup:@"RenderToTexture"]; + + MTLViewport viewport = { + 0.0, + 0.0, + (double)upscaledWidth, + (double)upscaledHeight, + 1.0, + 0.0 + }; + [currentEncoder setViewport:viewport]; + + u32 minX = pvrrc.getFramebufferMinX() * upscaledWidth / origWidth; + u32 minY = pvrrc.getFramebufferMinY() * upscaledHeight / origHeight; + getRenderToTextureDimensions(minX, minY, widthPow2, heightPow2); + baseScissor = MTLScissorRect { minX, minY, upscaledWidth, upscaledHeight }; + [currentEncoder setScissorRect:baseScissor]; + + return currentEncoder; +} + +void MetalTextureDrawer::EndRenderPass() +{ + [currentEncoder popDebugGroup]; + [currentEncoder endEncoding]; + currentEncoder = nil; + + u32 clippedWidth = pvrrc.getFramebufferWidth(); + u32 clippedHeight = pvrrc.getFramebufferHeight(); + + if (config::RenderToTextureBuffer) + { + + } + + if (config::RenderToTextureBuffer) + { + u16 *dst = (u16 *)&vram[textureAddr]; + + PixelBuffer tmpBuf; + tmpBuf.init(clippedWidth, clippedHeight); + // TODO: WRITE TO BUFFER + WriteTextureToVRam(clippedWidth, clippedHeight, (u8 *)tmpBuf.data(), dst, pvrrc.fb_W_CTRL, pvrrc.fb_W_LINESTRIDE * 8); + } + else + { + + } + + MetalDrawer::EndRenderPass(); +} + +void MetalScreenDrawer::Init(MetalSamplers *samplers, MetalShaders *shaders, const MTLViewport &viewport) { + emulateFramebuffer = config::EmulateFramebuffer; + this->shaderManager = shaders; + + if (this->viewport.height != viewport.height || + this->viewport.width != viewport.width || + this->viewport.originX != viewport.originX || + this->viewport.originY != viewport.originY || + this->viewport.zfar != viewport.zfar || + this->viewport.znear != viewport.znear) { + depthAttachment = nil; + framebuffers.clear(); + clearPassDescriptors.clear(); + loadPassDescriptors.clear(); + clearNeeded.clear(); + } + this->viewport = viewport; + + if (depthAttachment == nil) + { + MTLTextureDescriptor *descriptor = [[MTLTextureDescriptor alloc] init]; + descriptor.width = viewport.width; + descriptor.height = viewport.height; + descriptor.pixelFormat = MTLPixelFormatDepth32Float_Stencil8; + descriptor.usage = MTLTextureUsageRenderTarget | MTLTextureUsageShaderRead; + + depthAttachment = [MetalContext::Instance()->GetDevice() newTextureWithDescriptor:descriptor]; + } + + if (framebuffers.size() > 3) + { + framebuffers.resize(3); + loadPassDescriptors.resize(3); + clearPassDescriptors.resize(3); + clearNeeded.resize(3); + } + else + { + while (framebuffers.size() < 3) + { + MTLTextureDescriptor *texDescriptor = [[MTLTextureDescriptor alloc] init]; + texDescriptor.width = viewport.width; + texDescriptor.height = viewport.height; + texDescriptor.pixelFormat = MTLPixelFormatRGBA8Unorm; + texDescriptor.usage = MTLTextureUsageRenderTarget | MTLTextureUsageShaderRead; + + id colorAttachment = [MetalContext::Instance()->GetDevice() newTextureWithDescriptor:texDescriptor]; + framebuffers.push_back(colorAttachment); + + MTLRenderPassDescriptor *passDescriptor = [[MTLRenderPassDescriptor alloc] init]; + auto depth = passDescriptor.depthAttachment; + [depth setTexture:depthAttachment]; + [depth setLoadAction:MTLLoadActionClear]; + [depth setStoreAction:MTLStoreActionDontCare]; + + auto stencil = passDescriptor.stencilAttachment; + [stencil setTexture:depthAttachment]; + [stencil setLoadAction:MTLLoadActionClear]; + [stencil setStoreAction:MTLStoreActionDontCare]; + + auto color = passDescriptor.colorAttachments[0]; + [color setTexture:colorAttachment]; + [color setLoadAction:MTLLoadActionLoad]; + [color setStoreAction:MTLStoreActionStore]; + + loadPassDescriptors.push_back(passDescriptor); + + MTLRenderPassDescriptor *clearPassDescriptor = [passDescriptor copy]; + [clearPassDescriptor.colorAttachments[0] setLoadAction:MTLLoadActionClear]; + + clearPassDescriptors.push_back(clearPassDescriptor); + clearNeeded.push_back(true); + } + } + frameRendered = false; + + MetalDrawer::Init(samplers, MetalPipelineManager(shaderManager)); +} + +id MetalScreenDrawer::BeginRenderPass(id commandBuffer) { + if (!renderPassStarted) + { + frameRendered = false; + + MTLRenderPassDescriptor* passDescriptor = clearNeeded[GetCurrentImage()] || pvrrc.clearFramebuffer ? clearPassDescriptors[GetCurrentImage()] : loadPassDescriptors[GetCurrentImage()]; + clearNeeded[GetCurrentImage()] = false; + currentEncoder = [commandBuffer renderCommandEncoderWithDescriptor:passDescriptor]; + renderPassStarted = true; + } + + [currentEncoder setViewport:viewport]; + + matrices.CalcMatrices(&pvrrc, viewport.width, viewport.height); + + SetBaseScissor(viewport); + [currentEncoder setScissorRect:baseScissor]; + + return currentEncoder; +} + +void MetalScreenDrawer::EndRenderPass() { + if (!renderPassStarted) + return; + + [currentEncoder endEncoding]; + currentEncoder = nil; + + if (emulateFramebuffer) + { + + } + else + { + aspectRatio = getOutputFramebufferAspectRatio(); + } + MetalDrawer::EndRenderPass(); + + frameRendered = true; +} \ No newline at end of file diff --git a/core/rend/metal/metal_driver.h b/core/rend/metal/metal_driver.h index 3f8ee472b2..091ce5b2a9 100644 --- a/core/rend/metal/metal_driver.h +++ b/core/rend/metal/metal_driver.h @@ -39,31 +39,40 @@ class MetalDriver final : public ImGuiDriver { void newFrame() override { MetalContext *context = MetalContext::Instance(); - drawable = [context->GetLayer() nextDrawable]; - descriptor = [[MTLRenderPassDescriptor alloc] init]; - [descriptor setDefaultRasterSampleCount:1]; + // Use existing descriptor and encoder when available + if (context->GetDescriptor() != nullptr) { + ImGui_ImplMetal_NewFrame(MetalContext::Instance()->GetDescriptor()); + } else { + drawable = [context->GetLayer() nextDrawable]; + descriptor = [[MTLRenderPassDescriptor alloc] init]; - auto color = [descriptor colorAttachments][0]; - [color setClearColor:MTLClearColorMake(0.f, 0.f, 0.f, 1.f)]; - [color setTexture:[drawable texture]]; - [color setLoadAction:MTLLoadActionClear]; - [color setStoreAction:MTLStoreActionStore]; + [descriptor setDefaultRasterSampleCount:1]; - ImGui_ImplMetal_NewFrame(descriptor); + auto color = [descriptor colorAttachments][0]; + [color setClearColor:MTLClearColorMake(0.f, 0.f, 0.f, 1.f)]; + [color setTexture:[drawable texture]]; + [color setLoadAction:MTLLoadActionClear]; + [color setStoreAction:MTLStoreActionStore]; + + ImGui_ImplMetal_NewFrame(descriptor); + } } void renderDrawData(ImDrawData *drawData, bool gui_open) override { MetalContext *context = MetalContext::Instance(); - id buffer = [context->GetQueue() commandBuffer]; - id commandEncoder = [buffer renderCommandEncoderWithDescriptor:descriptor]; - - ImGui_ImplMetal_RenderDrawData(drawData, buffer, commandEncoder); - [commandEncoder endEncoding]; - commandEncoder = nil; - [buffer presentDrawable:drawable]; - [buffer commit]; + if (context->GetCommandBuffer() != nil && context->GetCommandBuffer() != nil) { + ImGui_ImplMetal_RenderDrawData(drawData, context->GetCommandBuffer(), context->GetEncoder()); + } else { + id buffer = [context->GetQueue() commandBuffer]; + id commandEncoder = [buffer renderCommandEncoderWithDescriptor:descriptor]; + ImGui_ImplMetal_RenderDrawData(drawData, buffer, commandEncoder); + + [commandEncoder endEncoding]; + [buffer presentDrawable:drawable]; + [buffer commit]; + } if (gui_open) frameRendered = true; diff --git a/core/rend/metal/metal_pipeline.h b/core/rend/metal/metal_pipeline.h index d460e1b34e..df7dcfebe3 100644 --- a/core/rend/metal/metal_pipeline.h +++ b/core/rend/metal/metal_pipeline.h @@ -24,6 +24,7 @@ #include "cfg/option.h" #include "hw/pvr/ta_ctx.h" +#include "metal_shaders.h" class MetalRenderer; @@ -32,7 +33,7 @@ enum class ModVolMode { Xor, Or, Inclusion, Exclusion, Final }; class MetalPipelineManager { public: - explicit MetalPipelineManager(MetalRenderer *renderer); + explicit MetalPipelineManager(MetalShaders *shaderManager); virtual ~MetalPipelineManager() = default; void term() @@ -251,7 +252,6 @@ class MetalPipelineManager } } - MetalRenderer *renderer; id blitPassPipeline = nil; std::map> pipelines; std::map> modVolPipelines; @@ -260,6 +260,9 @@ class MetalPipelineManager std::map> modVolStencilStates; std::map> depthStencilStates; std::map> depthPassDepthStencilStates; + +protected: + MetalShaders *shaderManager; }; static const MTLCompareFunction depthOps[] = diff --git a/core/rend/metal/metal_pipeline.mm b/core/rend/metal/metal_pipeline.mm index a8d41826c5..af399797dd 100644 --- a/core/rend/metal/metal_pipeline.mm +++ b/core/rend/metal/metal_pipeline.mm @@ -20,11 +20,10 @@ #include "metal_pipeline.h" #include "metal_context.h" -#include "metal_shaders.h" #include "metal_renderer.h" -MetalPipelineManager::MetalPipelineManager(MetalRenderer *renderer) { - this->renderer = renderer; +MetalPipelineManager::MetalPipelineManager(MetalShaders *shaderManager) { + this->shaderManager = shaderManager; } void MetalPipelineManager::CreateBlitPassPipeline() { @@ -32,10 +31,10 @@ [descriptor setLabel:@"Blit Pass"]; auto attachment = [descriptor colorAttachments][0]; - [attachment setPixelFormat:MTLPixelFormatBGRA8Unorm]; + [attachment setPixelFormat:MTLPixelFormatRGBA8Unorm]; - [descriptor setVertexFunction:renderer->GetShaders()->GetBlitVertexShader()]; - [descriptor setFragmentFunction:renderer->GetShaders()->GetBlitFragmentShader()]; + [descriptor setVertexFunction:shaderManager->GetBlitVertexShader()]; + [descriptor setFragmentFunction:shaderManager->GetBlitFragmentShader()]; NSError *error = nil; auto state = [MetalContext::Instance()->GetDevice() newRenderPipelineStateWithDescriptor:descriptor error:&error]; @@ -79,14 +78,14 @@ [attachment setDestinationAlphaBlendFactor:MTLBlendFactorOneMinusSourceAlpha]; [attachment setAlphaBlendOperation:MTLBlendOperationAdd]; [attachment setWriteMask:mode != ModVolMode::Final ? MTLColorWriteMaskNone : MTLColorWriteMaskAll]; - [attachment setPixelFormat:MTLPixelFormatBGRA8Unorm]; + [attachment setPixelFormat:MTLPixelFormatRGBA8Unorm]; [descriptor setDepthAttachmentPixelFormat:MTLPixelFormatDepth32Float_Stencil8]; [descriptor setStencilAttachmentPixelFormat:MTLPixelFormatDepth32Float_Stencil8]; - ModVolShaderParams shaderParams { naomi2, !settings.platform.isNaomi2() && config::NativeDepthInterpolation }; - [descriptor setVertexFunction:renderer->GetShaders()->GetModVolVertexShader(shaderParams)]; - [descriptor setFragmentFunction:renderer->GetShaders()->GetModVolFragmentShader(!settings.platform.isNaomi2() && config::NativeDepthInterpolation)]; + MetalModVolShaderParams shaderParams { naomi2, !settings.platform.isNaomi2() && config::NativeDepthInterpolation }; + [descriptor setVertexFunction:shaderManager->GetModVolVertexShader(shaderParams)]; + [descriptor setFragmentFunction:shaderManager->GetModVolFragmentShader(!settings.platform.isNaomi2() && config::NativeDepthInterpolation)]; NSError *error = nil; auto state = [MetalContext::Instance()->GetDevice() newRenderPipelineStateWithDescriptor:descriptor error:&error]; @@ -145,19 +144,19 @@ [attachment setDestinationAlphaBlendFactor:GetBlendFactor(dst, false)]; [attachment setAlphaBlendOperation:MTLBlendOperationAdd]; [attachment setWriteMask:MTLColorWriteMaskAll]; - [attachment setPixelFormat:MTLPixelFormatBGRA8Unorm]; + [attachment setPixelFormat:MTLPixelFormatRGBA8Unorm]; [descriptor setDepthAttachmentPixelFormat:MTLPixelFormatDepth32Float_Stencil8]; [descriptor setStencilAttachmentPixelFormat:MTLPixelFormatDepth32Float_Stencil8]; bool divPosZ = !settings.platform.isNaomi2() && config::NativeDepthInterpolation; - VertexShaderParams vertParams = {}; + MetalVertexShaderParams vertParams = {}; vertParams.gouraud = pp.pcw.Gouraud == 1; vertParams.naomi2 = pp.isNaomi2(); vertParams.divPosZ = divPosZ; - FragmentShaderParams fragParams = {}; + MetalFragmentShaderParams fragParams = {}; fragParams.alphaTest = listType == ListType_Punch_Through; fragParams.bumpmap = pp.tcw.PixelFmt == PixelBumpMap; fragParams.clamping = pp.tsp.ColorClamp; @@ -174,8 +173,8 @@ fragParams.divPosZ = divPosZ; fragParams.dithering = dithering; - [descriptor setVertexFunction:renderer->GetShaders()->GetVertexShader(vertParams)]; - [descriptor setFragmentFunction:renderer->GetShaders()->GetFragmentShader(fragParams)]; + [descriptor setVertexFunction:shaderManager->GetVertexShader(vertParams)]; + [descriptor setFragmentFunction:shaderManager->GetFragmentShader(fragParams)]; NSError *error = nil; auto state = [MetalContext::Instance()->GetDevice() newRenderPipelineStateWithDescriptor:descriptor error:&error]; diff --git a/core/rend/metal/metal_quad.h b/core/rend/metal/metal_quad.h new file mode 100644 index 0000000000..c831d9cdc4 --- /dev/null +++ b/core/rend/metal/metal_quad.h @@ -0,0 +1,115 @@ +/* + Copyright 2025 flyinghead + + This file is part of Flycast. + + Flycast is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + Flycast is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Flycast. If not, see . +*/ + +#pragma once + +#include +#include "metal_shaders.h" +#include "metal_buffer.h" + +struct MetalQuadVertex +{ + float x, y, z; + float u, v; +}; + +class MetalQuadBuffer +{ +public: + MetalQuadBuffer() + { + buffer = std::make_unique(sizeof(MetalQuadVertex) * 4); + } + + void Bind(id commandEncoder) + { + [commandEncoder setVertexBuffer:buffer->buffer offset:0 atIndex:0]; + } + + void Draw(id commandEncoder) + { + [commandEncoder drawPrimitives:MTLPrimitiveTypeTriangleStrip vertexStart:0 vertexCount:4]; + } + + void Update(MetalQuadVertex vertices[4] = nullptr) + { + if (vertices == nullptr) + { + static MetalQuadVertex defaultVtx[4] + { + { -1.f, -1.f, 0.f, 0.f, 1.f }, + { 1.f, -1.f, 0.f, 1.f, 1.f }, + { -1.f, 1.f, 0.f, 0.f, 0.f }, + { 1.f, 1.f, 0.f, 1.f, 0.f }, + }; + vertices = defaultVtx; + }; + + memcpy([buffer->buffer contents], vertices, sizeof(MetalQuadVertex) * 4); + } +private: + std::unique_ptr buffer; +}; + +class MetalQuadPipeline +{ +public: + MetalQuadPipeline(bool ignoreTexAlpha, bool rotate = false) + : rotate(rotate), ignoreTexAlpha(ignoreTexAlpha) {} + void Init(MetalShaders *shaderManager); + void Term() { + linearSampler = nil; + nearestSampler = nil; + } + void BindPipeline(id commandEncoder) { [commandEncoder setRenderPipelineState:GetPipeline()]; } + + id GetLinearSampler() { return linearSampler; } + id GetNearestSampler() { return nearestSampler; } +private: + id GetPipeline() { + if (!pipeline) + CreatePipeline(); + return pipeline; + } + void CreatePipeline(); + + + id pipeline; + id linearSampler; + id nearestSampler; + MetalShaders *shaderManager = nullptr; + bool rotate; + bool ignoreTexAlpha; +}; + +class MetalQuadDrawer +{ +public: + MetalQuadDrawer() = default; + MetalQuadDrawer(MetalQuadDrawer &&) = default; + MetalQuadDrawer(const MetalQuadDrawer &) = delete; + MetalQuadDrawer& operator=(MetalQuadDrawer &&) = default; + MetalQuadDrawer& operator=(const MetalQuadDrawer &) = delete; + + void Init(MetalQuadPipeline *pipeline); + void Draw(id commandEncoder, id texture, MetalQuadVertex vertices[4] = nullptr, bool nearestFilter = false, const float *color = nullptr); +private: + MetalQuadPipeline *pipeline = nullptr; + std::unique_ptr buffer; +}; diff --git a/core/rend/metal/metal_quad.mm b/core/rend/metal/metal_quad.mm new file mode 100644 index 0000000000..797d7cadac --- /dev/null +++ b/core/rend/metal/metal_quad.mm @@ -0,0 +1,119 @@ +/* + Copyright 2025 flyinghead + + This file is part of Flycast. + + Flycast is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + Flycast is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Flycast. If not, see . +*/ + +#include "metal_quad.h" +#import "metal_context.h" + +void MetalQuadPipeline::CreatePipeline() +{ + MTLRenderPipelineDescriptor *pipelineDescriptor = [[MTLRenderPipelineDescriptor alloc] init]; + [pipelineDescriptor setVertexFunction:shaderManager->GetQuadVertexShader(rotate)]; + [pipelineDescriptor setFragmentFunction:shaderManager->GetQuadFragmentShader(ignoreTexAlpha)]; + + [pipelineDescriptor setInputPrimitiveTopology:MTLPrimitiveTopologyClassTriangle]; + + auto color = pipelineDescriptor.colorAttachments[0]; + [color setBlendingEnabled:TRUE]; + [color setSourceRGBBlendFactor:MTLBlendFactorSourceAlpha]; + [color setDestinationRGBBlendFactor:MTLBlendFactorOneMinusSourceAlpha]; + [color setRgbBlendOperation:MTLBlendOperationAdd]; + [color setSourceAlphaBlendFactor:MTLBlendFactorSourceAlpha]; + [color setDestinationAlphaBlendFactor:MTLBlendFactorOneMinusSourceAlpha]; + [color setAlphaBlendOperation:MTLBlendOperationAdd]; + [color setWriteMask:MTLColorWriteMaskAll]; + [color setPixelFormat:MTLPixelFormatRGBA8Unorm]; + + MTLVertexDescriptor *vertexDescriptor = [[MTLVertexDescriptor alloc] init]; + auto pos = vertexDescriptor.attributes[0]; + [pos setFormat:MTLVertexFormatFloat3]; + [pos setBufferIndex:0]; + [pos setOffset:offsetof(MetalQuadVertex, x)]; + + auto uv = vertexDescriptor.attributes[1]; + [uv setFormat:MTLVertexFormatFloat2]; + [uv setBufferIndex:0]; + [uv setOffset:offsetof(MetalQuadVertex, u)]; + + auto layout = vertexDescriptor.layouts[0]; + [layout setStride:sizeof(MetalQuadVertex)]; + + [pipelineDescriptor setVertexDescriptor:vertexDescriptor]; + + NSError *error = nil; + pipeline = [MetalContext::Instance()->GetDevice() newRenderPipelineStateWithDescriptor:pipelineDescriptor error:&error]; + + if (pipeline == nil) + { + ERROR_LOG(RENDERER, "Failed to create quad pipeline: %s", [[error localizedDescription] UTF8String]); + } +} + +void MetalQuadPipeline::Init(MetalShaders *shaderManager) +{ + this->shaderManager = shaderManager; + if (linearSampler == nil) + { + MTLSamplerDescriptor *samplerDescriptor = [[MTLSamplerDescriptor alloc] init]; + [samplerDescriptor setMinFilter:MTLSamplerMinMagFilterLinear]; + [samplerDescriptor setMagFilter:MTLSamplerMinMagFilterLinear]; + [samplerDescriptor setMipFilter:MTLSamplerMipFilterLinear]; + [samplerDescriptor setSAddressMode:MTLSamplerAddressModeClampToEdge]; + [samplerDescriptor setTAddressMode:MTLSamplerAddressModeClampToEdge]; + [samplerDescriptor setRAddressMode:MTLSamplerAddressModeClampToEdge]; + linearSampler = [MetalContext::Instance()->GetDevice() newSamplerStateWithDescriptor:samplerDescriptor]; + } + if (nearestSampler == nil) + { + MTLSamplerDescriptor *samplerDescriptor = [[MTLSamplerDescriptor alloc] init]; + [samplerDescriptor setMinFilter:MTLSamplerMinMagFilterNearest]; + [samplerDescriptor setMagFilter:MTLSamplerMinMagFilterNearest]; + [samplerDescriptor setMipFilter:MTLSamplerMipFilterNearest]; + [samplerDescriptor setSAddressMode:MTLSamplerAddressModeClampToEdge]; + [samplerDescriptor setTAddressMode:MTLSamplerAddressModeClampToEdge]; + [samplerDescriptor setRAddressMode:MTLSamplerAddressModeClampToEdge]; + nearestSampler = [MetalContext::Instance()->GetDevice() newSamplerStateWithDescriptor:samplerDescriptor]; + } +} + +void MetalQuadDrawer::Init(MetalQuadPipeline *pipeline) { + this->pipeline = pipeline; + buffer = std::make_unique(); +} + +void MetalQuadDrawer::Draw(id commandEncoder, id texture, MetalQuadVertex *vertices, bool nearestFilter, const float *color) +{ + pipeline->BindPipeline(commandEncoder); + buffer->Update(vertices); + buffer->Bind(commandEncoder); + + if (texture != nil) + { + [commandEncoder setFragmentTexture:texture atIndex:0]; + [commandEncoder setFragmentSamplerState:nearestFilter ? pipeline->GetNearestSampler() : pipeline->GetLinearSampler() atIndex:0]; + } + + if (color == nullptr) + { + static float fullWhite[] { 1.f, 1.f, 1.f, 1.f }; + color = fullWhite; + } + + [commandEncoder setFragmentBytes:color length:sizeof(float) * 4 atIndex:0]; + buffer->Draw(commandEncoder); +} \ No newline at end of file diff --git a/core/rend/metal/metal_renderer.h b/core/rend/metal/metal_renderer.h index 405a3a61e3..dfc520f45e 100644 --- a/core/rend/metal/metal_renderer.h +++ b/core/rend/metal/metal_renderer.h @@ -22,126 +22,54 @@ #include "metal_shaders.h" #include "metal_texture.h" #include "metal_buffer.h" -#include "hw/pvr/Renderer_if.h" +#include "metal_drawer.h" +#include "hw/pvr/Renderer_if.h" #include "rend/tileclip.h" #include "rend/transform_matrix.h" -class MetalRenderer final : public Renderer +class BaseMetalRenderer : public Renderer { +protected: + bool BaseInit(id commandEncoder); + public: - bool Init() override; void Term() override; + BaseTextureCacheData *GetTexture(TSP tsp, TCW tcw) override; void Process(TA_context* ctx) override; - bool Render() override; + void ReInitOSD(); void RenderFramebuffer(const FramebufferInfo& info) override; - MetalShaders* GetShaders() { return &shaders; } - BaseTextureCacheData *GetTexture(TSP tsp, TCW tcw) override;; - -private: - bool Draw(const MetalTexture *fogTexture, const MetalTexture *paletteTexture); - void DrawPoly(id encoder, u32 listType, bool sortTriangles, const PolyParam& poly, u32 first, u32 count); - void DrawSorted(id encoder, const std::vector& polys, u32 first, u32 last, bool multipass); - void DrawList(id encoder, u32 listType, bool sortTriangles, const std::vector& polys, u32 first, u32 last); - void DrawModVols(id encoder, int first, int count); - void UploadMainBuffer(const VertexShaderUniforms& vertexUniforms, const FragmentShaderUniforms& fragmentUniforms); - void EndRenderPass(); - -protected: - TileClipping SetTileClip(id encoder, u32 val, MTLScissorRect& clipRect); - void SetBaseScissor(MTLViewport viewport); + void WaitIdle(); - void SetScissor(id encoder, const MTLScissorRect& scissor) - { - if (scissor.x != currentScissor.x || - scissor.y != currentScissor.y || - scissor.width != currentScissor.width || - scissor.height != currentScissor.height) - { - [encoder setScissorRect:scissor]; - currentScissor = scissor; - } + bool RenderLastFrame() override { + return !clearLastFrame; } - MetalBufferData* GetMainBuffer(u32 size) - { - if (mainBuffer != nullptr) - { - if (mainBuffer->bufferSize < size) { - u32 newSize = (u32)mainBuffer->bufferSize; - while (newSize < size) - newSize *= 2; - - INFO_LOG(RENDERER, "Increasing main buffer size %zd -> %d", mainBuffer->bufferSize, newSize); - [mainBuffer->buffer setPurgeableState: MTLPurgeableStateEmpty]; - - mainBuffer = new MetalBufferData(newSize); - } - } - else - { - mainBuffer = new MetalBufferData(std::max(512 * 1024u, size)); - } - - return mainBuffer; + bool GetLastFrame(std::vector& data, int& width, int& height) override { + // return MetalContext::Instance()->GetLastFrame(data, width, height); + return false; } - template - T MakeFragmentUniforms() +protected: + virtual void resize(int w, int h) { - T fragUniforms; - - //VERT and RAM fog color constants - FOG_COL_VERT.getRGBColor(fragUniforms.sp_FOG_COL_VERT); - FOG_COL_RAM.getRGBColor(fragUniforms.sp_FOG_COL_RAM); - - //Fog density constant - fragUniforms.sp_FOG_DENSITY = FOG_DENSITY.get() * config::ExtraDepthScale; - - pvrrc.fog_clamp_min.getRGBAColor(fragUniforms.colorClampMin); - pvrrc.fog_clamp_max.getRGBAColor(fragUniforms.colorClampMax); - - fragUniforms.cp_AlphaTestValue = (PT_ALPHA_REF & 0xFF) / 255.0f; - - return fragUniforms; + viewport.width = w; + viewport.height = h; } void CheckFogTexture(); void CheckPaletteTexture(); - void WaitIdle(); - - struct { - u64 indexOffset = 0; - u64 modVolOffset = 0; - u64 vertexUniformOffset = 0; - u64 fragmentUniformOffset = 0; - u64 naomi2OpaqueOffset = 0; - u64 naomi2PunchThroughOffset = 0; - u64 naomi2TranslucentOffset = 0; - u64 naomi2ModVolOffset = 0; - u64 naomi2TrModVolOffset = 0; - u64 lightsOffset = 0; - } offsets; - - bool renderPassStarted = false; + bool presentFramebuffer(); - MTLScissorRect baseScissor {}; - MTLScissorRect currentScissor {}; - TransformMatrix matrices; - - id frameBuffer = nil; - id depthBuffer = nil; - - id curMainBuffer = nil; + MetalShaders shaderManager; + std::unique_ptr fogTexture; + std::unique_ptr paletteTexture; id commandBuffer = nil; id texCommandBuffer = nil; - MetalBufferData* mainBuffer; - MetalPipelineManager pipelineManager = MetalPipelineManager(this); - MetalShaders shaders; + std::vector> framebufferTextures; + int framebufferTexIndex = 0; MetalTextureCache textureCache; - std::unique_ptr fogTexture; - std::unique_ptr paletteTexture; - MetalSamplers samplers; - bool frameRendered = false; - bool dithering = false; + MTLViewport viewport = MTLViewport { 0, 0, 640, 480, 0, 0 }; + bool framebufferRendered = false; }; + diff --git a/core/rend/metal/metal_renderer.mm b/core/rend/metal/metal_renderer.mm index b449ece0c2..3be4571b4a 100644 --- a/core/rend/metal/metal_renderer.mm +++ b/core/rend/metal/metal_renderer.mm @@ -23,34 +23,49 @@ #include "metal_renderer.h" #include "hw/aica/dsp.h" #include "hw/pvr/ta.h" -#include "hw/pvr/pvr_mem.h" -#include "rend/sorter.h" -bool MetalRenderer::Init() +bool BaseMetalRenderer::BaseInit(id commandEncoder) { - NOTICE_LOG(RENDERER, "Metal renderer initializing"); - - pipelineManager = MetalPipelineManager(this); - shaders = MetalShaders(); - samplers = MetalSamplers(); - - frameRendered = false; - return true; } -void MetalRenderer::Term() { +void BaseMetalRenderer::Term() +{ WaitIdle(); - pipelineManager.term(); - shaders.term(); - samplers.term(); - fogTexture = nullptr; - paletteTexture = nullptr; + MetalContext::Instance()->PresentFrame(nil, MTLViewport {}, 0); + textureCache.Clear(); + fogTexture = nil; + paletteTexture = nil; + framebufferTextures.clear(); + framebufferTexIndex = 0; + shaderManager.term(); +} + +BaseTextureCacheData *BaseMetalRenderer::GetTexture(TSP tsp, TCW tcw) +{ + MetalTexture* tf = textureCache.getTextureCacheData(tsp, tcw); + + if (tf->NeedsUpdate()) { + tf->SetCommandBuffer(texCommandBuffer); + + if (!tf->Update()) { + tf = nullptr; + return nullptr; + } + } + else if (tf->IsCustomTextureAvailable()) { + // TODO + tf->SetCommandBuffer(texCommandBuffer); + } + tf->SetCommandBuffer(nil); + + return tf; } -void MetalRenderer::Process(TA_context *ctx) { +void BaseMetalRenderer::Process(TA_context *ctx) +{ if (!ctx->rend.isRTT) { - frameRendered = false; + framebufferRendered = false; if (!config::EmulateFramebuffer) clearLastFrame = false; } @@ -70,56 +85,51 @@ texCommandBuffer = nil; } -bool MetalRenderer::Render() { - if (pvrrc.isRTT) { - - } - else { - - } - - // TODO: Don't hardcode these values - matrices.CalcMatrices(&pvrrc, 1920, 1080); - Draw(fogTexture.get(), paletteTexture.get()); - // if (config::EmulateFramebuffer || pvrrc.isRTT) - // // delay ending the render pass in case of multi render - // EndRenderPass(); - - return true; -} - -void MetalRenderer::EndRenderPass() { - if (!renderPassStarted) - return; - - frameRendered = true; -} - -void MetalRenderer::RenderFramebuffer(const FramebufferInfo &info) { +void BaseMetalRenderer::ReInitOSD() +{ } -BaseTextureCacheData *MetalRenderer::GetTexture(TSP tsp, TCW tcw) { - MetalTexture* tf = textureCache.getTextureCacheData(tsp, tcw); +void BaseMetalRenderer::RenderFramebuffer(const FramebufferInfo &info) +{ + framebufferTexIndex = (framebufferTexIndex + 1) % 3; - if (tf->NeedsUpdate()) { - tf->SetCommandBuffer(texCommandBuffer); + if (framebufferTextures.size() != 3) + framebufferTextures.resize(3); - if (!tf->Update()) { - tf = nullptr; - return nullptr; - } + std::unique_ptr& curTexture = framebufferTextures[framebufferTexIndex]; + if (!curTexture) + { + curTexture = std::make_unique(); + curTexture->tex_type = TextureType::_8888; } - else if (tf->IsCustomTextureAvailable()) { - // TODO - tf->SetCommandBuffer(texCommandBuffer); + if (info.fb_r_ctrl.fb_enable == 0 || info.vo_control.blank_video == 1) + { + // Video output disabled + u8 rgba[]{ (u8)info.vo_border_col._red, (u8)info.vo_border_col._green, (u8)info.vo_border_col._blue, 255 }; + curTexture->UploadToGPU(1, 1, rgba, false); } - tf->SetCommandBuffer(nil); + else + { + PixelBuffer pb; + int width; + int height; + ReadFramebuffer(info, pb, width, height); - return tf; + curTexture->UploadToGPU(width, height, (u8*)pb.data(), false); + } + + framebufferRendered = true; + clearLastFrame = false; +} + +void BaseMetalRenderer::WaitIdle() +{ + [commandBuffer waitUntilCompleted]; + commandBuffer = nil; } -void MetalRenderer::CheckFogTexture() { +void BaseMetalRenderer::CheckFogTexture() { if (!fogTexture) { fogTexture = std::make_unique(); @@ -137,7 +147,7 @@ fogTexture->SetCommandBuffer(nil); } -void MetalRenderer::CheckPaletteTexture() { +void BaseMetalRenderer::CheckPaletteTexture() { if (!paletteTexture) { paletteTexture = std::make_unique(); @@ -152,489 +162,123 @@ paletteTexture->SetCommandBuffer(nil); } -void MetalRenderer::WaitIdle() { - [commandBuffer waitUntilCompleted]; - commandBuffer = nil; -} - -TileClipping MetalRenderer::SetTileClip(id encoder, u32 val, MTLScissorRect& clipRect) { - int rect[4] = {}; - TileClipping clipMode = GetTileClip(val, matrices.GetViewportMatrix(), rect); - if (clipMode != TileClipping::Off) - { - clipRect.x = rect[0]; - clipRect.y = rect[1]; - clipRect.width = rect[2]; - clipRect.height = rect[3]; - } - if (clipMode == TileClipping::Outside) - SetScissor(encoder, clipRect); - else - SetScissor(encoder, baseScissor); - - return clipMode; -} +bool BaseMetalRenderer::presentFramebuffer() +{ + if (framebufferTexIndex >= (int)framebufferTextures.size()) + return false; + MetalTexture *fbTexture = framebufferTextures[framebufferTexIndex].get(); + if (fbTexture == nullptr) + return false; + MTLViewport viewport = { 0, 0, (float)fbTexture->GetTexture().width, (float)fbTexture->GetTexture().height, 1.0, 0 }; -void MetalRenderer::SetBaseScissor(MTLViewport viewport) { - bool wide_screen_on = config::Widescreen - && !matrices.IsClipped() && !config::Rotate90 && !config::EmulateFramebuffer; - if (!wide_screen_on) - { - float width; - float height; - float min_x; - float min_y; - glm::vec4 clip_min(pvrrc.fb_X_CLIP.min, pvrrc.fb_Y_CLIP.min, 0, 1); - glm::vec4 clip_dim(pvrrc.fb_X_CLIP.max - pvrrc.fb_X_CLIP.min + 1, - pvrrc.fb_Y_CLIP.max - pvrrc.fb_Y_CLIP.min + 1, 0, 0); - clip_min = matrices.GetScissorMatrix() * clip_min; - clip_dim = matrices.GetScissorMatrix() * clip_dim; - - min_x = clip_min[0]; - min_y = clip_min[1]; - width = clip_dim[0]; - height = clip_dim[1]; - if (width < 0) - { - min_x += width; - width = -width; - } - if (height < 0) - { - min_y += height; - height = -height; - } - - baseScissor = MTLScissorRect(); - baseScissor.x = std::max(lroundf(min_x), 0L); - baseScissor.y = std::max(lroundf(min_y), 0L); - baseScissor.width = std::max(lroundf(width), 0L); - baseScissor.height = std::max(lroundf(height), 0L); - } - else - { - baseScissor = MTLScissorRect(); - baseScissor.x = 0; - baseScissor.y = 0; - baseScissor.width = viewport.width; - baseScissor.height = viewport.height; - } + MetalContext::Instance()->PresentFrame(fbTexture->GetTexture(), viewport, + getDCFramebufferAspectRatio()); + return true; } -void MetalRenderer::DrawPoly(id encoder, u32 listType, bool sortTriangles, const PolyParam &poly, u32 first, u32 count) +class MetalRenderer final : public BaseMetalRenderer { - MTLScissorRect scissorRect {}; - TileClipping tileClip = SetTileClip(encoder, poly.tileclip, scissorRect); - - float trilinearAlpha = 1.0f; - if (poly.tsp.FilterMode > 1 && poly.pcw.Texture && listType != ListType_Punch_Through && poly.tcw.MipMapped == 1) - { - trilinearAlpha = 0.25f * (poly.tsp.MipMapD & 0x3); - if (poly.tsp.FilterMode == 2) - // Trilinear pass A - trilinearAlpha = 1.0f - trilinearAlpha; - } - int gpuPalette = poly.texture == nullptr || !poly.texture->gpuPalette ? 0 - : poly.tsp.FilterMode + 1; - float palette_index = 0.0f; - if (gpuPalette != 0) +public: + bool Init() { - if (config::TextureFiltering == 1) - gpuPalette = 1; - else if (config::TextureFiltering == 2) - gpuPalette = 2; - if (poly.tcw.PixelFmt == PixelPal4) - palette_index = float(poly.tcw.PalSelect << 4) / 1023.0f; - else - palette_index = float(poly.tcw.PalSelect >> 4 << 8) / 1023.0f; - } + NOTICE_LOG(RENDERER, "MetalRenderer::Init"); - std::array pushConstants; + textureDrawer.Init(&samplerManager, &shaderManager, &textureCache); + screenDrawer.Init(&samplerManager, &shaderManager, viewport); + // BaseInit(screenDrawer.GetRenderPass()); + emulateFramebuffer = config::EmulateFramebuffer; - if (tileClip == TileClipping::Inside || trilinearAlpha != 1.0f || gpuPalette != 0) - { - pushConstants = { - (float)scissorRect.x, - (float)scissorRect.y, - (float)scissorRect.x + (float)scissorRect.width, - (float)scissorRect.y + (float)scissorRect.height, - trilinearAlpha, - palette_index - }; - } else { - pushConstants = { 0, 0, 0, 0, 0, 0 }; + return true; } - [encoder setFragmentBytes:pushConstants.data() length:sizeof(pushConstants) + MetalBufferPacker::align(sizeof(pushConstants), 16) atIndex:1]; - - bool shadowed = listType == ListType_Opaque || listType == ListType_Punch_Through; - - [encoder setRenderPipelineState:pipelineManager.GetPipeline(listType, sortTriangles, poly, gpuPalette, dithering)]; - [encoder setDepthStencilState:pipelineManager.GetDepthStencilStates(listType, sortTriangles, shadowed, poly)]; - - if (shadowed) { - if (poly.pcw.Shadow != 0) { - [encoder setStencilReferenceValue:0x80]; - } else { - [encoder setStencilReferenceValue:0]; - } - } - - if (poly.texture != nullptr) { - auto texture = ((MetalTexture *)poly.texture)->GetTexture(); - [encoder setFragmentTexture:texture atIndex:0]; - - // Texture sampler - [encoder setFragmentSamplerState:samplers.GetSampler(poly, listType == ListType_Punch_Through) atIndex:0]; - } - - if (poly.pcw.Texture || poly.isNaomi2()) + void Term() { - u32 index = 0; - if (poly.isNaomi2()) - { - - } - - // TODO: Bind Texture & Naomi2 Lights Buffers + NOTICE_LOG(RENDERER, "MetalRenderer::Term"); + WaitIdle(); + screenDrawer.Term(); + textureDrawer.Term(); + samplerManager.term(); + BaseMetalRenderer::Term(); } - MTLPrimitiveType primitive = sortTriangles && !config::PerStripSorting ? MTLPrimitiveTypeTriangle : MTLPrimitiveTypeTriangleStrip; - - [encoder drawIndexedPrimitives:primitive - indexCount:count - indexType:MTLIndexTypeUInt32 - indexBuffer:curMainBuffer - indexBufferOffset:offsets.indexOffset + first * sizeof(u32)]; -} - -void MetalRenderer::DrawSorted(id encoder, const std::vector &polys, u32 first, u32 last, bool multipass) -{ - if (first == last) - return; - - [encoder pushDebugGroup:@"DrawSorted"]; - - for (u32 idx = first; idx < last; idx++) - DrawPoly(encoder, ListType_Translucent, true, pvrrc.global_param_tr[polys[idx].polyIndex], polys[idx].first, polys[idx].count); - if (multipass && config::TranslucentPolygonDepthMask) + void Process(TA_context* ctx) override { - // Write to the depth buffer now. The next render pass might need it. (Cosmic Smash) - for (u32 idx = first; idx < last; idx++) + if (emulateFramebuffer != config::EmulateFramebuffer) { - const SortedTriangle& param = polys[idx]; - const PolyParam& polyParam = pvrrc.global_param_tr[param.polyIndex]; - if (polyParam.isp.ZWriteDis) - continue; - [encoder setRenderPipelineState:pipelineManager.GetDepthPassPipeline(polyParam.isp.CullMode, polyParam.isNaomi2())]; - [encoder setDepthStencilState:pipelineManager.GetDepthPassDepthStencilStates(polyParam.isp.CullMode, polyParam.isNaomi2())]; - MTLScissorRect scissorRect {}; - SetTileClip(encoder, polyParam.tileclip, scissorRect); - [encoder drawIndexedPrimitives:MTLPrimitiveTypeTriangle - indexCount:param.count - indexType:MTLIndexTypeUInt32 - indexBuffer:curMainBuffer - indexBufferOffset:offsets.indexOffset + param.first * sizeof(u32)]; - } - } - - [encoder popDebugGroup]; -} - -void MetalRenderer::DrawList(id encoder, u32 listType, bool sortTriangles, const std::vector &polys, u32 first, u32 last) -{ - if (first == last) - return; - - [encoder pushDebugGroup:@"DrawList"]; - - const PolyParam *pp_end = polys.data() + last; - for (const PolyParam *pp = &polys[first]; pp != pp_end; pp++) - if (pp->count > 2) - DrawPoly(encoder, listType, sortTriangles, *pp, pp->first, pp->count); - - [encoder popDebugGroup]; -} - -void MetalRenderer::DrawModVols(id encoder, int first, int count) -{ - if (count == 0 || pvrrc.modtrig.empty() || !config::ModifierVolumes) - return; - - [encoder pushDebugGroup:@"DrawModVols"]; - [encoder setVertexBufferOffset:offsets.modVolOffset atIndex:30]; - - ModifierVolumeParam* params = &pvrrc.global_param_mvo[first]; - - int mod_base = -1; - id state; - id depth_state; - - const std::array pushConstants = { 1 - FPU_SHAD_SCALE.scale_factor / 256.f }; - [encoder setFragmentBytes:pushConstants.data() length:sizeof(pushConstants) + MetalBufferPacker::align(sizeof(pushConstants), 16) atIndex:1]; - - for (int cmv = 0; cmv < count; cmv++) { - ModifierVolumeParam& param = params[cmv]; - MTLCullMode cull_mode = param.isp.CullMode == 3 ? MTLCullModeBack : param.isp.CullMode == 2 ? MTLCullModeFront : MTLCullModeNone; - [encoder setCullMode:cull_mode]; - [encoder setFrontFacingWinding:MTLWindingCounterClockwise]; - - if (param.count == 0) - continue; - - u32 mv_mode = param.isp.DepthMode; - - if (mod_base == -1) - mod_base = param.first; - - if (!param.isp.VolumeLast && mv_mode > 0) { - state = pipelineManager.GetModifierVolumePipeline(ModVolMode::Or, param.isp.CullMode, param.isNaomi2()); // OR'ing (open volume or quad) - depth_state = pipelineManager.GetModVolDepthStencilStates(ModVolMode::Or, param.isp.CullMode, param.isNaomi2()); - } else { - state = pipelineManager.GetModifierVolumePipeline(ModVolMode::Xor, param.isp.CullMode, param.isNaomi2()); // XOR'ing (closed volume) - depth_state = pipelineManager.GetModVolDepthStencilStates(ModVolMode::Xor, param.isp.CullMode, param.isNaomi2()); + screenDrawer.EndRenderPass(); + WaitIdle(); + screenDrawer.Term(); + screenDrawer.Init(&samplerManager, &shaderManager, viewport); + // BaseInit(screenDrawer.GetRenderPass()); + emulateFramebuffer = config::EmulateFramebuffer; } - - [encoder setRenderPipelineState:state]; - [encoder setDepthStencilState:depth_state]; - [encoder setStencilReferenceValue:2]; - MTLScissorRect scissorRect {}; - SetTileClip(encoder, param.tileclip, scissorRect); - // TODO inside clipping - - [encoder drawPrimitives:MTLPrimitiveTypeTriangle - vertexStart:param.first * 3 - vertexCount:param.count * 3]; - - if (mv_mode == 1 || mv_mode == 2) - { - // Sum the area - state = pipelineManager.GetModifierVolumePipeline(mv_mode == 1 ? ModVolMode::Inclusion : ModVolMode::Exclusion, param.isp.CullMode, param.isNaomi2()); - depth_state = pipelineManager.GetModVolDepthStencilStates(mv_mode == 1 ? ModVolMode::Inclusion : ModVolMode::Exclusion, param.isp.CullMode, param.isNaomi2()); - [encoder setRenderPipelineState:state]; - [encoder setDepthStencilState:depth_state]; - [encoder setStencilReferenceValue:1]; - [encoder drawPrimitives:MTLPrimitiveTypeTriangle - vertexStart: mod_base * 3 - vertexCount: (param.first + param.count - mod_base) * 3]; - mod_base = -1; + else if (ctx->rend.isRTT) { + screenDrawer.EndRenderPass(); } + BaseMetalRenderer::Process(ctx); } - [encoder setVertexBufferOffset:0 atIndex:30]; - - state = pipelineManager.GetModifierVolumePipeline(ModVolMode::Final, 0, false); - depth_state = pipelineManager.GetModVolDepthStencilStates(ModVolMode::Final, 0, false); - [encoder setRenderPipelineState:state]; - [encoder setDepthStencilState:depth_state]; - [encoder setStencilReferenceValue:0x81]; - [encoder drawIndexedPrimitives:MTLPrimitiveTypeTriangleStrip - indexCount:4 - indexType:MTLIndexTypeUInt32 - indexBuffer:curMainBuffer - indexBufferOffset:offsets.indexOffset]; - - [encoder popDebugGroup]; -} -void MetalRenderer::UploadMainBuffer(const VertexShaderUniforms &vertexUniforms, const FragmentShaderUniforms &fragmentUniforms) { - MetalBufferPacker packer; - - // Vertex - packer.add(pvrrc.verts.data(), pvrrc.verts.size() * sizeof(decltype(*pvrrc.verts.data()))); - // Modifier Volumes - offsets.modVolOffset = packer.add(pvrrc.modtrig.data(), pvrrc.modtrig.size() * sizeof(decltype(*pvrrc.modtrig.data()))); - // Index - offsets.indexOffset = packer.add(pvrrc.idx.data(), pvrrc.idx.size() * sizeof(decltype(*pvrrc.idx.data()))); - // Uniform buffers - offsets.vertexUniformOffset = packer.addUniform(&vertexUniforms, sizeof(vertexUniforms)); - offsets.fragmentUniformOffset = packer.addUniform(&fragmentUniforms, sizeof(fragmentUniforms)); - - std::vector n2uniforms; - if (settings.platform.isNaomi2()) + bool Render() override { - // packNaomi2Uniforms(packer, offsets, n2uniforms, false); - // offsets.lightsOffset = packNaomi2Lights(packer); - } - - MetalBufferData *buffer = GetMainBuffer(packer.size()); - packer.upload(*buffer); - curMainBuffer = buffer->buffer; -} - -bool MetalRenderer::Draw(const MetalTexture *fogTexture, const MetalTexture *paletteTexture) { - matrices.CalcMatrices(&pvrrc); - u32 origWidth = pvrrc.getFramebufferWidth(); - u32 origHeight = pvrrc.getFramebufferHeight(); - u32 upscaledWidth = origWidth; - u32 upscaledHeight = origHeight; - u32 widthPow2; - u32 heightPow2; - getRenderToTextureDimensions(upscaledWidth, upscaledHeight, widthPow2, heightPow2); - - FragmentShaderUniforms fragUniforms = MakeFragmentUniforms(); - dithering = config::EmulateFramebuffer && pvrrc.fb_W_CTRL.fb_dither && pvrrc.fb_W_CTRL.fb_packmode <= 3; - if (dithering) { - switch (pvrrc.fb_W_CTRL.fb_packmode) - { - case 0: // 0555 KRGB 16 bit - case 3: // 1555 ARGB 16 bit - fragUniforms.ditherDivisor[0] = fragUniforms.ditherDivisor[1] = fragUniforms.ditherDivisor[2] = 2.f; - break; - case 1: // 565 RGB 16 bit - fragUniforms.ditherDivisor[0] = fragUniforms.ditherDivisor[2] = 2.f; - fragUniforms.ditherDivisor[1] = 4.f; - break; - case 2: // 4444 ARGB 16 bit - fragUniforms.ditherDivisor[0] = fragUniforms.ditherDivisor[1] = fragUniforms.ditherDivisor[2] = 1.f; - break; - default: - break; + id commandBuffer = [MetalContext::Instance()->GetQueue() commandBuffer]; + + MetalDrawer *drawer; + if (pvrrc.isRTT) + drawer = &textureDrawer; + else { + resize(pvrrc.framebufferWidth, pvrrc.framebufferHeight); + drawer = &screenDrawer; } - fragUniforms.ditherDivisor[3] = 1.f; - } - currentScissor = MTLScissorRect {}; + drawer->Draw(fogTexture.get(), paletteTexture.get(), commandBuffer); + // TODO: ENABLE LATER WHEN WE CAN + //if (config::EmulateFramebuffer || pvrrc.isRTT) + // delay ending the render pass in case of multi render + drawer->EndRenderPass(); - if (!frameBuffer || widthPow2 > frameBuffer.width || heightPow2 > frameBuffer.height) { - if (frameBuffer) { - WaitIdle(); - [frameBuffer setPurgeableState:MTLPurgeableStateEmpty]; - frameBuffer = nil; - } + [commandBuffer commit]; - MTLTextureDescriptor *desc = [[MTLTextureDescriptor alloc] init]; - [desc setPixelFormat:MTLPixelFormatBGRA8Unorm]; - [desc setWidth:widthPow2]; - [desc setHeight:heightPow2]; - [desc setUsage:MTLTextureUsageShaderRead | MTLTextureUsageRenderTarget]; - - frameBuffer = [MetalContext::Instance()->GetDevice() newTextureWithDescriptor:desc]; - } - - if (!depthBuffer || widthPow2 > depthBuffer.width || heightPow2 > depthBuffer.height) { - if (depthBuffer) { - WaitIdle(); - [depthBuffer setPurgeableState:MTLPurgeableStateEmpty]; - depthBuffer = nil; - } - - MTLTextureDescriptor *depthDesc = [[MTLTextureDescriptor alloc] init]; - [depthDesc setPixelFormat:MTLPixelFormatDepth32Float_Stencil8]; - [depthDesc setWidth:widthPow2]; - [depthDesc setHeight:heightPow2]; - [depthDesc setUsage:MTLTextureUsageShaderRead | MTLTextureUsageRenderTarget]; - - depthBuffer = [MetalContext::Instance()->GetDevice() newTextureWithDescriptor:depthDesc]; + return !pvrrc.isRTT; } - auto drawable = [MetalContext::Instance()->GetLayer() nextDrawable]; - - commandBuffer = [MetalContext::Instance()->GetQueue() commandBuffer]; - MTLRenderPassDescriptor *descriptor = [[MTLRenderPassDescriptor alloc] init]; - auto color = [descriptor colorAttachments][0]; - [color setTexture:frameBuffer]; - [color setLoadAction:MTLLoadActionClear]; - [color setStoreAction:MTLStoreActionStore]; - - MTLRenderPassDepthAttachmentDescriptor *depthAttachmentDescriptor = [[MTLRenderPassDepthAttachmentDescriptor alloc] init]; - [depthAttachmentDescriptor setTexture:depthBuffer]; - [depthAttachmentDescriptor setLoadAction:MTLLoadActionClear]; - [depthAttachmentDescriptor setStoreAction:MTLStoreActionDontCare]; - - MTLRenderPassStencilAttachmentDescriptor *stencilAttachmentDescriptor = [[MTLRenderPassStencilAttachmentDescriptor alloc] init]; - [stencilAttachmentDescriptor setTexture:depthBuffer]; - [stencilAttachmentDescriptor setLoadAction:MTLLoadActionClear]; - [stencilAttachmentDescriptor setStoreAction:MTLStoreActionDontCare]; - - [descriptor setDepthAttachment:depthAttachmentDescriptor]; - [descriptor setStencilAttachment:stencilAttachmentDescriptor]; - - @autoreleasepool { - id renderEncoder = [commandBuffer renderCommandEncoderWithDescriptor:descriptor]; - - [renderEncoder setFragmentTexture:fogTexture->GetTexture() atIndex:2]; - [renderEncoder setFragmentTexture:paletteTexture->GetTexture() atIndex:3]; - - // Fog sampler - TSP fogTsp = {}; - fogTsp.FilterMode = 1; - fogTsp.ClampU = 1; - fogTsp.ClampV = 1; - [renderEncoder setFragmentSamplerState:samplers.GetSampler(fogTsp) atIndex:2]; - - // Palette sampler - TSP palTsp = {}; - palTsp.FilterMode = 0; - palTsp.ClampU = 1; - palTsp.ClampV = 1; - [renderEncoder setFragmentSamplerState:samplers.GetSampler(palTsp) atIndex:3]; - - setFirstProvokingVertex(pvrrc); - - // Upload vertex and index buffers - VertexShaderUniforms vtxUniforms {}; - vtxUniforms.ndcMat = matrices.GetNormalMatrix(); - - UploadMainBuffer(vtxUniforms, fragUniforms); - - [renderEncoder setVertexBuffer:curMainBuffer offset:0 atIndex:30]; - [renderEncoder setVertexBuffer:curMainBuffer offset:offsets.vertexUniformOffset atIndex:0]; - [renderEncoder setFragmentBuffer:curMainBuffer offset:offsets.fragmentUniformOffset atIndex:0]; - - RenderPass previous_pass {}; - for (int render_pass = 0; render_pass < (int)pvrrc.render_passes.size(); render_pass++) { - const RenderPass& current_pass = pvrrc.render_passes[render_pass]; - - DEBUG_LOG(RENDERER, "Render pass %d OP %d PT %d TR %d MV %d autosort %d", render_pass + 1, - current_pass.op_count - previous_pass.op_count, - current_pass.pt_count - previous_pass.pt_count, - current_pass.tr_count - previous_pass.tr_count, - current_pass.mvo_count - previous_pass.mvo_count, current_pass.autosort); - DrawList(renderEncoder, ListType_Opaque, false, pvrrc.global_param_op, previous_pass.op_count, current_pass.op_count); - DrawList(renderEncoder, ListType_Punch_Through, false, pvrrc.global_param_pt, previous_pass.pt_count, current_pass.pt_count); - DrawModVols(renderEncoder, previous_pass.mvo_count, current_pass.mvo_count - previous_pass.mvo_count); - if (current_pass.autosort) { - if (!config::PerStripSorting) - DrawSorted(renderEncoder, pvrrc.sortedTriangles, previous_pass.sorted_tr_count, current_pass.sorted_tr_count, render_pass + 1 < (int)pvrrc.render_passes.size()); - else - DrawList(renderEncoder, ListType_Translucent, true, pvrrc.global_param_tr, previous_pass.tr_count, current_pass.tr_count); - } else { - // TODO: This breaking? - // DrawList(renderEncoder, ListType_Translucent, false, pvrrc.global_param_tr, previous_pass.tr_count, current_pass.tr_count); - } - previous_pass = current_pass; - } - - [renderEncoder endEncoding]; + bool Present() override + { + if (clearLastFrame) + return false; + if (config::EmulateFramebuffer || framebufferRendered) + return presentFramebuffer(); + else + return screenDrawer.PresentFrame(); } - // Blit to framebuffer - descriptor = [[MTLRenderPassDescriptor alloc] init]; - color = [descriptor colorAttachments][0]; - [color setTexture:[drawable texture]]; - [color setLoadAction:MTLLoadActionClear]; - [color setStoreAction:MTLStoreActionStore]; - - @autoreleasepool { - id renderEncoder = [commandBuffer renderCommandEncoderWithDescriptor:descriptor]; - - [renderEncoder setRenderPipelineState:pipelineManager.GetBlitPassPipeline()]; - [renderEncoder setFragmentTexture:frameBuffer atIndex:0]; - [renderEncoder drawPrimitives: MTLPrimitiveTypeTriangleStrip vertexStart:0 vertexCount:4]; - [renderEncoder endEncoding]; +protected: + void resize(int w, int h) override + { + if ((u32)w == viewport.width && (u32)h == viewport.height) + return; + BaseMetalRenderer::resize(w, h); + WaitIdle(); + screenDrawer.Init(&samplerManager, &shaderManager, viewport); } - [commandBuffer presentDrawable:drawable]; - [commandBuffer commit]; - // TODO: Properly handle wait/vsync/buffering! - WaitIdle(); - - DEBUG_LOG(RENDERER, "Render command buffer released"); - return !pvrrc.isRTT; -} +private: + MetalSamplers samplerManager; + MetalScreenDrawer screenDrawer; + MetalTextureDrawer textureDrawer; + bool emulateFramebuffer = false; +}; Renderer* rend_Metal() { return new MetalRenderer(); } + +void MetalReInitOSD() +{ + if (renderer != nullptr) { + BaseMetalRenderer *mtlrenderer = dynamic_cast(renderer); + if (mtlrenderer != nullptr) + mtlrenderer->ReInitOSD(); + } +} \ No newline at end of file diff --git a/core/rend/metal/metal_shaders.h b/core/rend/metal/metal_shaders.h index 132ca70507..690138ad44 100644 --- a/core/rend/metal/metal_shaders.h +++ b/core/rend/metal/metal_shaders.h @@ -18,14 +18,14 @@ */ #pragma once + #include #include "types.h" #include #include - -struct VertexShaderParams +struct MetalVertexShaderParams { bool gouraud; bool naomi2; @@ -34,7 +34,7 @@ struct VertexShaderParams u32 hash() { return (u32)gouraud | ((u32)naomi2 << 1) | ((u32)divPosZ << 2); } }; -struct FragmentShaderParams +struct MetalFragmentShaderParams { bool alphaTest; bool insideClipTest; @@ -62,7 +62,7 @@ struct FragmentShaderParams } }; -struct ModVolShaderParams +struct MetalModVolShaderParams { bool naomi2; bool divPosZ; @@ -71,13 +71,13 @@ struct ModVolShaderParams }; // std140 alignment required -struct VertexShaderUniforms +struct MetalVertexShaderUniforms { glm::mat4 ndcMat; }; // std140 alignment required -struct FragmentShaderUniforms +struct MetalFragmentShaderUniforms { float colorClampMin[4]; float colorClampMax[4]; @@ -96,7 +96,7 @@ class MetalShaders id GetBlitVertexShader() { return blitVertexShader; } id GetBlitFragmentShader() { return blitFragmentShader; } - id GetModVolVertexShader(const ModVolShaderParams& params) { return getShader(modVolVertexShaders, params); } + id GetModVolVertexShader(const MetalModVolShaderParams& params) { return getShader(modVolVertexShaders, params); } id GetModVolFragmentShader(bool divPosZ) { auto modVolFragmentShader = modVolFragmentShaders.find(divPosZ); if (modVolFragmentShader != modVolFragmentShaders.end()) @@ -106,8 +106,38 @@ class MetalShaders return modVolFragmentShaders[divPosZ]; } - id GetVertexShader(const VertexShaderParams& params) { return getShader(vertexShaders, params); } - id GetFragmentShader(const FragmentShaderParams& params) { return getShader(fragmentShaders, params); } + id GetQuadVertexShader(bool rotate) { + if (rotate) + { + if (quadRotateVertexShader == nil) + quadRotateVertexShader = compileQuadVertexShader(true); + return quadRotateVertexShader; + } + else + { + if (quadVertexShader == nil) + quadVertexShader = compileQuadVertexShader(false); + return quadVertexShader; + } + } + + id GetQuadFragmentShader(bool ignoreTexAlpha) { + if (ignoreTexAlpha) + { + if (quadNoAlphaFragmentShader == nil) + quadNoAlphaFragmentShader = compileQuadFragmentShader(true); + return quadNoAlphaFragmentShader; + } + else + { + if (quadFragmentShader == nil) + quadFragmentShader = compileQuadFragmentShader(false); + return quadFragmentShader; + } + } + + id GetVertexShader(const MetalVertexShaderParams& params) { return getShader(vertexShaders, params); } + id GetFragmentShader(const MetalFragmentShaderParams& params) { return getShader(fragmentShaders, params); } void term() { @@ -120,9 +150,11 @@ class MetalShaders id modVolShaderLibrary; id vertexShaderLibrary; id fragmentShaderLibrary; + id quadShaderLibrary; MTLFunctionConstantValues* vertexShaderConstants; MTLFunctionConstantValues* fragmentShaderConstants; MTLFunctionConstantValues* modVolShaderConstants; + MTLFunctionConstantValues* quadShaderConstants; template id getShader(std::map> &map, T params) @@ -134,10 +166,12 @@ class MetalShaders map[h] = compileShader(params); return map[h]; } - id compileShader(const VertexShaderParams& params); - id compileShader(const FragmentShaderParams& params); - id compileShader(const ModVolShaderParams& params); + id compileShader(const MetalVertexShaderParams& params); + id compileShader(const MetalFragmentShaderParams& params); + id compileShader(const MetalModVolShaderParams& params); id compileShader(bool divPosZ); + id compileQuadVertexShader(bool rotate); + id compileQuadFragmentShader(bool ignoreTexAlpha); id blitVertexShader; id blitFragmentShader; @@ -147,4 +181,9 @@ class MetalShaders std::map> vertexShaders; std::map> fragmentShaders; + + id quadVertexShader; + id quadRotateVertexShader; + id quadFragmentShader; + id quadNoAlphaFragmentShader; }; diff --git a/core/rend/metal/metal_shaders.mm b/core/rend/metal/metal_shaders.mm index 28f4b11ee5..5263db5913 100644 --- a/core/rend/metal/metal_shaders.mm +++ b/core/rend/metal/metal_shaders.mm @@ -399,7 +399,6 @@ fragment float4 fs_main(VertexOut in [[stage_in]], static const char ModVolShaderSource[] = R"( #include -#include using namespace metal; @@ -469,6 +468,61 @@ fragment FragmentOut fs_main(VertexOut in [[stage_in]], } )"; +static const char QuadVertexShaderSource[] = R"( +#include + +using namespace metal; + +constant bool rotate_quad [[function_constant(0)]]; +constant bool ignore_tex_alpha [[function_constant(1)]]; + +struct VertexIn +{ + float3 pos [[attribute(0)]]; + float2 uv [[attribute(1)]]; +}; + +struct VertexOut +{ + float4 position [[position]]; + float2 uv; +}; + +struct PushBlock +{ + float4 color; +}; + +vertex VertexOut vs_main(VertexIn in [[stage_in]]) { + VertexOut out = {}; + + if (!rotate_quad) { + out.position = float4(in.pos, 1.0); + } else { + out.position = float4(-in.pos.y, in.pos.x, in.pos.z, 1.0); + } + + out.uv = in.uv; + return out; +} + +fragment float4 fs_main(VertexOut in [[stage_in]], + constant PushBlock& push_constants [[buffer(0)]], + texture2d tex [[texture(0)]], + sampler samp [[sampler(0)]]) { + float4 color; + + if (ignore_tex_alpha) { + color.rgb = push_constants.color.rgb * tex.sample(samp, in.uv).rgb; + color.a = push_constants.color.a; + } else { + color = push_constants.color * tex.sample(samp, in.uv); + } + + return color; +} +)"; + // TODO: N2 Shaders MetalShaders::MetalShaders() { @@ -514,9 +568,23 @@ fragment FragmentOut fs_main(VertexOut in [[stage_in]], modVolShaderLibrary = [device newLibraryWithSource:[NSString stringWithUTF8String:ModVolShaderSource] options:nil error:&error]; modVolShaderConstants = [[MTLFunctionConstantValues alloc] init]; + + if (!modVolShaderLibrary) { + ERROR_LOG(RENDERER, "%s", [[error localizedDescription] UTF8String]); + assert(false); + } + + quadShaderLibrary = [device newLibraryWithSource:[NSString stringWithUTF8String:QuadVertexShaderSource] options:nil error:&error]; + quadShaderConstants = [[MTLFunctionConstantValues alloc] init]; + + if (!quadShaderLibrary) { + ERROR_LOG(RENDERER, "%s", [[error localizedDescription] UTF8String]); + assert(false); + } + } -id MetalShaders::compileShader(const VertexShaderParams ¶ms) { +id MetalShaders::compileShader(const MetalVertexShaderParams ¶ms) { [vertexShaderConstants setConstantValue:¶ms.gouraud type:MTLDataTypeBool atIndex:0]; [vertexShaderConstants setConstantValue:¶ms.divPosZ type:MTLDataTypeBool atIndex:1]; @@ -532,7 +600,7 @@ fragment FragmentOut fs_main(VertexOut in [[stage_in]], return function; } -id MetalShaders::compileShader(const FragmentShaderParams ¶ms) { +id MetalShaders::compileShader(const MetalFragmentShaderParams ¶ms) { [fragmentShaderConstants setConstantValue:¶ms.alphaTest type:MTLDataTypeBool atIndex:0]; [fragmentShaderConstants setConstantValue:¶ms.insideClipTest type:MTLDataTypeBool atIndex:1]; [fragmentShaderConstants setConstantValue:¶ms.useAlpha type:MTLDataTypeBool atIndex:2]; @@ -561,7 +629,7 @@ fragment FragmentOut fs_main(VertexOut in [[stage_in]], return function; } -id MetalShaders::compileShader(const ModVolShaderParams ¶ms) { +id MetalShaders::compileShader(const MetalModVolShaderParams ¶ms) { [modVolShaderConstants setConstantValue:¶ms.divPosZ type:MTLDataTypeBool atIndex:0]; NSError* error = nil; @@ -592,3 +660,34 @@ fragment FragmentOut fs_main(VertexOut in [[stage_in]], return function; } +id MetalShaders::compileQuadVertexShader(bool rotate) +{ + [quadShaderConstants setConstantValue:&rotate type:MTLDataTypeBool atIndex:0]; + + NSError* error = nil; + + id function = [quadShaderLibrary newFunctionWithName:@"vs_main" constantValues:quadShaderConstants error:&error]; + if (!function) { + ERROR_LOG(RENDERER, "%s", [[error localizedDescription] UTF8String]); + assert(false); + } + + return function; +} + +id MetalShaders::compileQuadFragmentShader(bool ignoreTexAlpha) +{ + [quadShaderConstants setConstantValue:&ignoreTexAlpha type:MTLDataTypeBool atIndex:1]; + + NSError* error = nil; + + id function = [quadShaderLibrary newFunctionWithName:@"fs_main" constantValues:quadShaderConstants error:&error]; + if (!function) { + ERROR_LOG(RENDERER, "%s", [[error localizedDescription] UTF8String]); + assert(false); + } + + return function; +} + + diff --git a/core/rend/metal/metal_texture.h b/core/rend/metal/metal_texture.h index 38e19b8416..7b1b0a530a 100644 --- a/core/rend/metal/metal_texture.h +++ b/core/rend/metal/metal_texture.h @@ -32,6 +32,11 @@ class MetalTexture final : public BaseTextureCacheData id GetTexture() const { return texture; } void UploadToGPU(int width, int height, const u8 *data, bool mipmapped, bool mipmapsIncluded = false) override; void SetCommandBuffer(id commandBuffer) { this->commandBuffer = commandBuffer; } + void SetTexture(id texture, u32 width, u32 height) { + this->texture = texture; + this->width = width; + this->height = height; + } bool Delete() override; private: @@ -43,8 +48,8 @@ class MetalTexture final : public BaseTextureCacheData u32 width = 0; u32 height = 0; u32 mipmapLevels = 1; - id commandBuffer; - id texture; + id commandBuffer = nil; + id texture = nil; }; class MetalSamplers diff --git a/core/rend/vulkan/vulkan_renderer.cpp b/core/rend/vulkan/vulkan_renderer.cpp index c40fc989be..91e73deffe 100644 --- a/core/rend/vulkan/vulkan_renderer.cpp +++ b/core/rend/vulkan/vulkan_renderer.cpp @@ -316,6 +316,7 @@ class VulkanRenderer final : public BaseVulkanRenderer protected: void resize(int w, int h) override { + ERROR_LOG(RENDERER, "VulkanRenderer::resize %d %d", w, h); if ((u32)w == viewport.width && (u32)h == viewport.height) return; BaseVulkanRenderer::resize(w, h); diff --git a/core/sdl/sdl.cpp b/core/sdl/sdl.cpp index 37334750e9..11df4d3ef4 100644 --- a/core/sdl/sdl.cpp +++ b/core/sdl/sdl.cpp @@ -705,7 +705,7 @@ bool sdl_recreate_window(u32 flags) } #if !defined(GLES) - flags |= SDL_WINDOW_RESIZABLE | SDL_WINDOW_ALLOW_HIGHDPI; + flags |= SDL_WINDOW_RESIZABLE; if (window_fullscreen) flags |= SDL_WINDOW_FULLSCREEN_DESKTOP; else if (window_maximized) From 7641b437ba70e9396f34d9c58d955e419673c616 Mon Sep 17 00:00:00 2001 From: Isaac Marovitz Date: Sun, 1 Jun 2025 22:37:30 -0400 Subject: [PATCH 33/48] GetLastFrame Signed-off-by: Isaac Marovitz --- core/rend/metal/metal_context.mm | 104 +++++++++++++++++++++++++++++++ core/rend/metal/metal_renderer.h | 3 +- 2 files changed, 105 insertions(+), 2 deletions(-) diff --git a/core/rend/metal/metal_context.mm b/core/rend/metal/metal_context.mm index 13e7d0abce..8c65261d34 100644 --- a/core/rend/metal/metal_context.mm +++ b/core/rend/metal/metal_context.mm @@ -337,5 +337,109 @@ width = w; } + MTLTextureDescriptor *renderTargetDesc = [[MTLTextureDescriptor alloc] init]; + renderTargetDesc.width = width; + renderTargetDesc.height = height; + renderTargetDesc.pixelFormat = MTLPixelFormatRGBA8Unorm; + renderTargetDesc.usage = MTLTextureUsageRenderTarget; + renderTargetDesc.storageMode = MTLStorageModePrivate; + + id renderTarget = [device newTextureWithDescriptor:renderTargetDesc]; + [renderTarget setLabel:@"Screenshot Render Target"]; + + NSUInteger bytesPerPixel = 4; + NSUInteger bytesPerRow = width * bytesPerPixel; + NSUInteger bufferSize = bytesPerRow * height; + + id readbackBuffer = [device newBufferWithLength:bufferSize + options:MTLResourceStorageModeShared]; + [readbackBuffer setLabel:@"Screenshot Readback Buffer"]; + + id commandBuffer = [queue commandBuffer]; + [commandBuffer setLabel:@"GetLastFrame"]; + + MTLRenderPassDescriptor *renderPassDesc = [[MTLRenderPassDescriptor alloc] init]; + renderPassDesc.colorAttachments[0].texture = renderTarget; + renderPassDesc.colorAttachments[0].loadAction = MTLLoadActionClear; + renderPassDesc.colorAttachments[0].storeAction = MTLStoreActionStore; + renderPassDesc.colorAttachments[0].clearColor = MTLClearColorMake(0, 0, 0, 1); + + id renderEncoder = [commandBuffer renderCommandEncoderWithDescriptor:renderPassDesc]; + [renderEncoder setLabel:@"GetLastFrame Render"]; + + MTLViewport viewport = { + 0.0, 0.0, + (double)width, (double)height, + 0.0, 1.0 + }; + [renderEncoder setViewport:viewport]; + + MTLScissorRect scissor = { + 0, 0, + (NSUInteger)width, (NSUInteger)height + }; + [renderEncoder setScissorRect:scissor]; + + MetalQuadVertex vtx[4] = { + { -1.f, -1.f, 0.f, 0.f, 1.f }, + { 1.f, -1.f, 0.f, 1.f, 1.f }, + { -1.f, 1.f, 0.f, 0.f, 0.f }, + { 1.f, 1.f, 0.f, 1.f, 0.f }, + }; + + if (config::Rotate90) { + quadRotatePipeline->BindPipeline(renderEncoder); + quadRotateDrawer->Draw(renderEncoder, lastFrameTexture, vtx, false); + } else { + quadPipeline->BindPipeline(renderEncoder); + quadDrawer->Draw(renderEncoder, lastFrameTexture, vtx, false); + } + + [renderEncoder endEncoding]; + + // Copy from render target to buffer + id blitEncoder = [commandBuffer blitCommandEncoder]; + [blitEncoder setLabel:@"GetLastFrame Blit"]; + + MTLOrigin sourceOrigin = MTLOriginMake(0, 0, 0); + MTLSize sourceSize = MTLSizeMake(width, height, 1); + + [blitEncoder copyFromTexture:renderTarget + sourceSlice:0 + sourceLevel:0 + sourceOrigin:sourceOrigin + sourceSize:sourceSize + toBuffer:readbackBuffer + destinationOffset:0 + destinationBytesPerRow:bytesPerRow + destinationBytesPerImage:bufferSize]; + + [blitEncoder endEncoding]; + + [commandBuffer commit]; + [commandBuffer waitUntilCompleted]; + + if (commandBuffer.status != MTLCommandBufferStatusCompleted) { + NSError *error = commandBuffer.error; + WARN_LOG(RENDERER, "MetalContext::GetLastFrame: Command buffer failed: %s", + error ? error.localizedDescription.UTF8String : "Unknown error"); + return false; + } + + // Read back the data + const u8 *img = (const u8 *)[readbackBuffer contents]; + data.clear(); + + data.reserve(width * height * 3); + // RGBA -> RGB conversion + for (int y = 0; y < height; y++) { + for (int x = 0; x < width; x++) { + data.push_back(*img++); // R + data.push_back(*img++); // G + data.push_back(*img++); // B + img++; // Skip A + } + } + return true; } diff --git a/core/rend/metal/metal_renderer.h b/core/rend/metal/metal_renderer.h index dfc520f45e..8f2bfa9ac9 100644 --- a/core/rend/metal/metal_renderer.h +++ b/core/rend/metal/metal_renderer.h @@ -46,8 +46,7 @@ class BaseMetalRenderer : public Renderer } bool GetLastFrame(std::vector& data, int& width, int& height) override { - // return MetalContext::Instance()->GetLastFrame(data, width, height); - return false; + return MetalContext::Instance()->GetLastFrame(data, width, height); } protected: From 684278559ef8f58c1be8e5d55d93c60d62b5dcf4 Mon Sep 17 00:00:00 2001 From: Isaac Marovitz Date: Sun, 1 Jun 2025 23:07:09 -0400 Subject: [PATCH 34/48] Uncomment unsorted translucent draw Signed-off-by: Isaac Marovitz --- core/rend/metal/metal_drawer.mm | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/core/rend/metal/metal_drawer.mm b/core/rend/metal/metal_drawer.mm index ed4742831b..6850e7eee8 100644 --- a/core/rend/metal/metal_drawer.mm +++ b/core/rend/metal/metal_drawer.mm @@ -399,8 +399,7 @@ else DrawList(renderEncoder, ListType_Translucent, true, pvrrc.global_param_tr, previous_pass.tr_count, current_pass.tr_count); } else { - // TODO: This breaking? - // DrawList(renderEncoder, ListType_Translucent, false, pvrrc.global_param_tr, previous_pass.tr_count, current_pass.tr_count); + DrawList(renderEncoder, ListType_Translucent, false, pvrrc.global_param_tr, previous_pass.tr_count, current_pass.tr_count); } previous_pass = current_pass; } From d7e6b33b4e62aeec28dc3af38bdeaac0d6960de1 Mon Sep 17 00:00:00 2001 From: Isaac Marovitz Date: Sun, 1 Jun 2025 23:25:00 -0400 Subject: [PATCH 35/48] =?UTF-8?q?Don=E2=80=99t=20advetise=20as=20per=20pix?= =?UTF-8?q?el=20rn?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Isaac Marovitz --- core/rend/metal/metal_context.h | 2 +- core/ui/gui.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/core/rend/metal/metal_context.h b/core/rend/metal/metal_context.h index 2acecc10c0..202c8abbb8 100644 --- a/core/rend/metal/metal_context.h +++ b/core/rend/metal/metal_context.h @@ -70,7 +70,7 @@ class MetalContext : public GraphicsContext } bool hasPerPixel() override { - return true; + return false; } bool recreateSwapChainIfNeeded(); diff --git a/core/ui/gui.cpp b/core/ui/gui.cpp index 98bc5dac94..0024a51e32 100644 --- a/core/ui/gui.cpp +++ b/core/ui/gui.cpp @@ -2200,7 +2200,7 @@ static void gui_settings_video() break; case RenderType::Metal: renderApi = 4; - perPixel = true; + perPixel = false; } constexpr int apiCount = 0 From 17fdbab60a35c5fa6af5e1a0136eb8c98154acfc Mon Sep 17 00:00:00 2001 From: Isaac Marovitz Date: Sun, 1 Jun 2025 23:39:49 -0400 Subject: [PATCH 36/48] Last Metal driver fixes Signed-off-by: Isaac Marovitz --- core/rend/metal/metal_context.h | 10 ++------- core/rend/metal/metal_context.mm | 11 +++++++++- core/rend/metal/metal_driver.h | 35 +++++++++++++++++--------------- 3 files changed, 31 insertions(+), 25 deletions(-) diff --git a/core/rend/metal/metal_context.h b/core/rend/metal/metal_context.h index 202c8abbb8..9024a72e87 100644 --- a/core/rend/metal/metal_context.h +++ b/core/rend/metal/metal_context.h @@ -48,16 +48,10 @@ class MetalContext : public GraphicsContext id GetDevice() const { return device; } CAMetalLayer* GetLayer() const { return layer; } id GetQueue() const { return queue; } - MTLRenderPassDescriptor* GetDescriptor() const { - if (rendering) { - return renderPassDescriptor; - } - - // Hack to make sure imgui takes control of presenting pause menu - return nullptr; - } + MTLRenderPassDescriptor* GetDescriptor() const { return renderPassDescriptor; } id GetEncoder() const { return commandEncoder; } id GetCommandBuffer() const { return commandBuffers[currentImage]; } + bool IsRendering() const { return rendering; } std::string getDriverName() override; diff --git a/core/rend/metal/metal_context.mm b/core/rend/metal/metal_context.mm index 8c65261d34..4a33bc279a 100644 --- a/core/rend/metal/metal_context.mm +++ b/core/rend/metal/metal_context.mm @@ -195,9 +195,10 @@ swapOnVSync = (!settings.input.fastForwardMode && config::VSync); resized = true; } - if (resized) + if (resized) { CreateSwapChain(); lastFrameTexture = nil; + } } void MetalContext::DrawFrame(id texture, MTLViewport viewport, float aspectRatio) { @@ -276,7 +277,15 @@ void MetalContext::term() { GraphicsContext::instance = nullptr; + lastFrameTexture = nil; imguiDriver.reset(); + quadDrawer.reset(); + quadPipeline.reset(); + quadPipelineWithAlpha.reset(); + quadRotateDrawer.reset(); + quadRotatePipeline.reset(); + shaderManager.reset(); + commandBuffers.clear(); } bool MetalContext::HasSurfaceDimensionChanged() const diff --git a/core/rend/metal/metal_driver.h b/core/rend/metal/metal_driver.h index 091ce5b2a9..65837653bd 100644 --- a/core/rend/metal/metal_driver.h +++ b/core/rend/metal/metal_driver.h @@ -34,6 +34,7 @@ class MetalDriver final : public ImGuiDriver { void reset() override { ImGuiDriver::reset(); + textures.clear(); ImGui_ImplMetal_Shutdown(); } @@ -62,26 +63,28 @@ class MetalDriver final : public ImGuiDriver { void renderDrawData(ImDrawData *drawData, bool gui_open) override { MetalContext *context = MetalContext::Instance(); - if (context->GetCommandBuffer() != nil && context->GetCommandBuffer() != nil) { - ImGui_ImplMetal_RenderDrawData(drawData, context->GetCommandBuffer(), context->GetEncoder()); - } else { - id buffer = [context->GetQueue() commandBuffer]; - id commandEncoder = [buffer renderCommandEncoderWithDescriptor:descriptor]; - ImGui_ImplMetal_RenderDrawData(drawData, buffer, commandEncoder); - - [commandEncoder endEncoding]; - [buffer presentDrawable:drawable]; - [buffer commit]; + if (!context->IsValid()) + return; + + bool rendering = context->IsRendering(); + if (!rendering) + context->NewFrame(); + if (!rendering || newFrameStarted) + { + context->BeginRenderPass(); + if (renderer->RenderLastFrame()) + context->PresentLastFrame(); } - if (gui_open) - frameRendered = true; + ImGui_ImplMetal_RenderDrawData(drawData, context->GetCommandBuffer(), context->GetEncoder()); + + if (!rendering || newFrameStarted) + context->EndFrame(); + newFrameStarted = false; } void present() override { - if (frameRendered) - //MetalContext::Instance()->GetDevice().pre - frameRendered = false; + MetalContext::Instance()->Present(); } ImTextureID getTexture(const std::string &name) override { @@ -118,8 +121,8 @@ class MetalDriver final : public ImGuiDriver { std::unique_ptr texture; }; - bool frameRendered = false; MTLRenderPassDescriptor* descriptor; id drawable; std::unordered_map textures; + bool newFrameStarted = false; }; From f5e0d3a52f23eeed21479bc316676e43dd4d1733 Mon Sep 17 00:00:00 2001 From: Isaac Marovitz Date: Mon, 2 Jun 2025 13:49:44 -0400 Subject: [PATCH 37/48] isolate mac only api Signed-off-by: Isaac Marovitz --- core/rend/metal/metal_context.mm | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/core/rend/metal/metal_context.mm b/core/rend/metal/metal_context.mm index 4a33bc279a..66f5f3f5dd 100644 --- a/core/rend/metal/metal_context.mm +++ b/core/rend/metal/metal_context.mm @@ -35,9 +35,11 @@ [layer setPixelFormat:MTLPixelFormatBGRA8Unorm]; [layer setFramebufferOnly:TRUE]; - [layer setDisplaySyncEnabled:TRUE]; [layer setColorspace:CGColorSpaceCreateWithName(kCGColorSpaceSRGB)]; [layer setMaximumDrawableCount:3]; +#if TARGET_OS_MAC || TARGET_OS_MACCATALYST + [layer setDisplaySyncEnabled:TRUE]; +#endif auto size = [layer drawableSize]; width = size.width; From 60f1b2c7d045b191b2a767dfe4a804ccaccd5c43 Mon Sep 17 00:00:00 2001 From: Isaac Marovitz Date: Mon, 2 Jun 2025 19:27:24 -0400 Subject: [PATCH 38/48] Fix mipmap upload Signed-off-by: Isaac Marovitz --- core/rend/metal/metal_texture.mm | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/core/rend/metal/metal_texture.mm b/core/rend/metal/metal_texture.mm index a5a1a49fea..76c81dd801 100644 --- a/core/rend/metal/metal_texture.mm +++ b/core/rend/metal/metal_texture.mm @@ -105,7 +105,7 @@ u32 dataOffset = 0; for (u32 i = 0; i < mipmapLevels; i++) { - const u32 size = (1 << (2 * i)) * bpp; + const u32 size = (1 << (2 * i)) * 2; u32 mipLevel = mipmapLevels - i - 1; u32 mipWidth = std::max(texture.width >> mipLevel, 1ul); @@ -114,10 +114,11 @@ MTLRegion region = MTLRegionMake2D(0, 0, mipWidth, mipHeight); [texture replaceRegion:region mipmapLevel:mipLevel - withBytes:src + dataOffset + withBytes:src bytesPerRow:mipWidth * bpp]; dataOffset += ((size + 3) >> 2) << 2; + src += size; } } else From 6653b2dec8881b25cb55af6b472382709f5051c6 Mon Sep 17 00:00:00 2001 From: Isaac Marovitz Date: Sat, 14 Jun 2025 17:55:32 -0400 Subject: [PATCH 39/48] Initial Naomi 2 Support Signed-off-by: Isaac Marovitz --- core/rend/metal/metal_drawer.h | 92 ++++++++ core/rend/metal/metal_drawer.mm | 27 ++- core/rend/metal/metal_pipeline.mm | 6 +- core/rend/metal/metal_shaders.h | 16 ++ core/rend/metal/metal_shaders.mm | 375 +++++++++++++++++++++++++++++- 5 files changed, 502 insertions(+), 14 deletions(-) diff --git a/core/rend/metal/metal_drawer.h b/core/rend/metal/metal_drawer.h index 8dac657946..d687a09bac 100644 --- a/core/rend/metal/metal_drawer.h +++ b/core/rend/metal/metal_drawer.h @@ -27,6 +27,7 @@ #include "rend/transform_matrix.h" #include "rend/sorter.h" #include "hw/pvr/pvr_mem.h" +#include class MetalBaseDrawer { @@ -92,6 +93,97 @@ class MetalBaseDrawer return fragUniforms; } + template + void packNaomi2Uniforms(MetalBufferPacker& packer, Offsets& offsets, std::vector& n2uniforms, bool trModVolIncluded) + { + size_t n2UniformSize = sizeof(MetalN2VertexShaderUniforms) + MetalBufferPacker::align(sizeof(MetalN2VertexShaderUniforms), 16); + int items = pvrrc.global_param_op.size() + pvrrc.global_param_pt.size() + pvrrc.global_param_tr.size() + pvrrc.global_param_mvo.size(); + if (trModVolIncluded) + items += pvrrc.global_param_mvo_tr.size(); + n2uniforms.resize(items * n2UniformSize); + size_t bufIdx = 0; + auto addUniform = [&](const PolyParam& pp, int polyNumber) { + if (pp.isNaomi2()) + { + MetalN2VertexShaderUniforms& uni = *(MetalN2VertexShaderUniforms *)&n2uniforms[bufIdx]; + memcpy(glm::value_ptr(uni.mvMat), pvrrc.matrices[pp.mvMatrix].mat, sizeof(uni.mvMat)); + memcpy(glm::value_ptr(uni.normalMat), pvrrc.matrices[pp.normalMatrix].mat, sizeof(uni.normalMat)); + memcpy(glm::value_ptr(uni.projMat), pvrrc.matrices[pp.projMatrix].mat, sizeof(uni.projMat)); + uni.bumpMapping = pp.pcw.Texture == 1 && pp.tcw.PixelFmt == PixelBumpMap; + uni.polyNumber = polyNumber; + for (size_t i = 0; i < 2; i++) + { + uni.envMapping[i] = pp.envMapping[i]; + uni.glossCoef[i] = pp.glossCoef[i]; + uni.constantColor[i] = pp.constantColor[i]; + } + } + bufIdx += n2UniformSize; + }; + for (const PolyParam& pp : pvrrc.global_param_op) + addUniform(pp, 0); + size_t ptOffset = bufIdx; + for (const PolyParam& pp : pvrrc.global_param_pt) + addUniform(pp, 0); + size_t trOffset = bufIdx; + if (!pvrrc.global_param_tr.empty()) + { + u32 firstVertexIdx = pvrrc.idx[pvrrc.global_param_tr[0].first]; + for (const PolyParam& pp : pvrrc.global_param_tr) + addUniform(pp, ((&pp - &pvrrc.global_param_tr[0]) << 17) - firstVertexIdx); + } + size_t mvOffset = bufIdx; + for (const ModifierVolumeParam& mvp : pvrrc.global_param_mvo) + { + if (mvp.isNaomi2()) + { + MetalN2VertexShaderUniforms& uni = *(MetalN2VertexShaderUniforms *)&n2uniforms[bufIdx]; + memcpy(glm::value_ptr(uni.mvMat), pvrrc.matrices[mvp.mvMatrix].mat, sizeof(uni.mvMat)); + memcpy(glm::value_ptr(uni.projMat), pvrrc.matrices[mvp.projMatrix].mat, sizeof(uni.projMat)); + } + bufIdx += n2UniformSize; + } + size_t trMvOffset = bufIdx; + if (trModVolIncluded) + for (const ModifierVolumeParam& mvp : pvrrc.global_param_mvo_tr) + { + if (mvp.isNaomi2()) + { + MetalN2VertexShaderUniforms& uni = *(MetalN2VertexShaderUniforms *)&n2uniforms[bufIdx]; + memcpy(glm::value_ptr(uni.mvMat), pvrrc.matrices[mvp.mvMatrix].mat, sizeof(uni.mvMat)); + memcpy(glm::value_ptr(uni.projMat), pvrrc.matrices[mvp.projMatrix].mat, sizeof(uni.projMat)); + } + bufIdx += n2UniformSize; + } + offsets.naomi2OpaqueOffset = packer.addUniform(n2uniforms.data(), bufIdx); + offsets.naomi2PunchThroughOffset = offsets.naomi2OpaqueOffset + ptOffset; + offsets.naomi2TranslucentOffset = offsets.naomi2OpaqueOffset + trOffset; + offsets.naomi2ModVolOffset = offsets.naomi2OpaqueOffset + mvOffset; + offsets.naomi2TrModVolOffset = offsets.naomi2OpaqueOffset + trMvOffset; + } + + u64 packNaomi2Lights(MetalBufferPacker& packer) + { + u64 offset = -1; + + size_t n2LightSize = sizeof(N2LightModel) + MetalBufferPacker::align(sizeof(N2LightModel), 16); + if (n2LightSize == sizeof(N2LightModel) && !pvrrc.lightModels.empty()) + { + offset = packer.addUniform(&pvrrc.lightModels[0], pvrrc.lightModels.size() * sizeof(decltype(pvrrc.lightModels[0]))); + } + else + { + for (const N2LightModel& model : pvrrc.lightModels) + { + u64 ioffset = packer.addUniform(&model, sizeof(N2LightModel)); + if (offset == (u64)-1) + offset = ioffset; + } + } + + return offset; + } + MTLScissorRect baseScissor {}; MTLScissorRect currentScissor {}; TransformMatrix matrices; diff --git a/core/rend/metal/metal_drawer.mm b/core/rend/metal/metal_drawer.mm index 6850e7eee8..1ecf87a839 100644 --- a/core/rend/metal/metal_drawer.mm +++ b/core/rend/metal/metal_drawer.mm @@ -152,13 +152,32 @@ if (poly.pcw.Texture || poly.isNaomi2()) { + u64 offset = 0; u32 index = 0; if (poly.isNaomi2()) { - + switch (listType) + { + case ListType_Opaque: + offset = offsets.naomi2OpaqueOffset; + index = &poly - &pvrrc.global_param_op[0]; + break; + case ListType_Punch_Through: + offset = offsets.naomi2PunchThroughOffset; + index = &poly - &pvrrc.global_param_pt[0]; + break; + case ListType_Translucent: + offset = offsets.naomi2TranslucentOffset; + index = &poly - &pvrrc.global_param_tr[0]; + break; + } } - // TODO: Bind Texture & Naomi2 Lights Buffers + size_t size = sizeof(MetalN2VertexShaderUniforms) + MetalBufferPacker::align(sizeof(MetalN2VertexShaderUniforms), 16); + [encoder setVertexBuffer:curMainBuffer offset:offset + index * size atIndex:1]; + + size = sizeof(N2LightModel) + MetalBufferPacker::align(sizeof(N2LightModel), 16); + [encoder setVertexBuffer:curMainBuffer offset:offsets.lightsOffset + poly.lightModel * size atIndex:2]; } MTLPrimitiveType primitive = sortTriangles && !config::PerStripSorting ? MTLPrimitiveTypeTriangle : MTLPrimitiveTypeTriangleStrip; @@ -314,8 +333,8 @@ std::vector n2uniforms; if (settings.platform.isNaomi2()) { - // packNaomi2Uniforms(packer, offsets, n2uniforms, false); - // offsets.lightsOffset = packNaomi2Lights(packer); + packNaomi2Uniforms(packer, offsets, n2uniforms, false); + offsets.lightsOffset = packNaomi2Lights(packer); } MetalBufferData *buffer = GetMainBuffer(packer.size()); diff --git a/core/rend/metal/metal_pipeline.mm b/core/rend/metal/metal_pipeline.mm index af399797dd..c3671bac8a 100644 --- a/core/rend/metal/metal_pipeline.mm +++ b/core/rend/metal/metal_pipeline.mm @@ -113,9 +113,9 @@ [attachment setAlphaBlendOperation:MTLBlendOperationAdd]; [attachment setWriteMask:MTLColorWriteMaskNone]; - // TODO: Need functions here - // descriptor->setVertexFunction(); - // descriptor->setFragmentFunction(); + MetalModVolShaderParams shaderParams { naomi2, !settings.platform.isNaomi2() && config::NativeDepthInterpolation }; + [descriptor setVertexFunction:shaderManager->GetModVolVertexShader(shaderParams)]; + [descriptor setFragmentFunction:shaderManager->GetModVolFragmentShader(!settings.platform.isNaomi2() && config::NativeDepthInterpolation)];; NSError *error = nil; auto state = [MetalContext::Instance()->GetDevice() newRenderPipelineStateWithDescriptor:descriptor error:&error]; diff --git a/core/rend/metal/metal_shaders.h b/core/rend/metal/metal_shaders.h index 690138ad44..663d2edd8f 100644 --- a/core/rend/metal/metal_shaders.h +++ b/core/rend/metal/metal_shaders.h @@ -88,6 +88,20 @@ struct MetalFragmentShaderUniforms float sp_FOG_DENSITY; }; +// std140 alignment required +struct MetalN2VertexShaderUniforms +{ + glm::mat4 mvMat; + glm::mat4 normalMat; + glm::mat4 projMat; + int envMapping[2]; + int bumpMapping; + int polyNumber; + + float glossCoef[2]; + int constantColor[2]; +}; + class MetalShaders { public: @@ -148,7 +162,9 @@ class MetalShaders private: id blitShaderLibrary; id modVolShaderLibrary; + id n2ModVolVertexShaderLibrary; id vertexShaderLibrary; + id n2VertexShaderLibrary; id fragmentShaderLibrary; id quadShaderLibrary; MTLFunctionConstantValues* vertexShaderConstants; diff --git a/core/rend/metal/metal_shaders.mm b/core/rend/metal/metal_shaders.mm index 5263db5913..bf10d743f0 100644 --- a/core/rend/metal/metal_shaders.mm +++ b/core/rend/metal/metal_shaders.mm @@ -468,7 +468,7 @@ fragment FragmentOut fs_main(VertexOut in [[stage_in]], } )"; -static const char QuadVertexShaderSource[] = R"( +static const char QuadShaderSource[] = R"( #include using namespace metal; @@ -523,7 +523,342 @@ fragment float4 fs_main(VertexOut in [[stage_in]], } )"; -// TODO: N2 Shaders +static const char N2VertexShaderSource[] = R"( +#include + +using namespace metal; + +constant bool pp_gouraud [[function_constant(0)]]; + +constant bool is_flat = pp_gouraud == 0; +constant bool is_not_flag = !is_flat; + +struct VertexShaderUniforms +{ + float4x4 ndc_mat; +}; + +struct N2VertexShaderUniforms +{ + float4x4 mv_mat; + float4x4 normal_mat; + float4x4 proj_mat; + int2 env_mapping; + int bump_mapping; + int poly_number; + + float2 gloss_coef; + int2 constant_color; +}; + +struct N2VertexIn +{ + float4 in_pos [[attribute(0)]]; + float4 in_base [[attribute(1)]]; + float4 in_offs [[attribute(2)]]; + float2 in_uv [[attribute(3)]]; + float3 in_normal [[attribute(4)]]; +}; + +struct VertexOut +{ + float4 flat_vtx_base [[flat, function_constant(is_flat)]]; + float4 flat_vtx_offs [[flat, function_constant(is_flat)]]; + float4 vtx_base [[function_constant(is_not_flag)]]; + float4 vtx_offs [[function_constant(is_not_flag)]]; + float3 vtx_uv; + float4 position [[position]]; +}; + +#define PI 3.1415926 + +#define LMODE_SINGLE_SIDED 0 +#define LMODE_DOUBLE_SIDED 1 +#define LMODE_DOUBLE_SIDED_WITH_TOLERANCE 2 +#define LMODE_SPECIAL_EFFECT 3 +#define LMODE_THIN_SURFACE 4 +#define LMODE_BUMP_MAP 5 + +#define ROUTING_SPEC_TO_OFFSET 1 +#define ROUTING_DIFF_TO_OFFSET 2 +#define ROUTING_ATTENUATION 1 // not handled +#define ROUTING_FOG 2 // not handled +#define ROUTING_ALPHA 4 +#define ROUTING_SUB 8 + +struct N2Light +{ + float4 color; + float4 direction; // For parallel/spot + float4 position; // For spot/point + + int parallel; + int routing; + int dmode; + int smode; + + int2 diffuse; + int2 specular; + + float attn_dist_a; + float attn_dist_b; + float attn_angle_a; // For spot + float attn_angle_b; + + int dist_attn_mode; // For spot/point + int _pad1; + int _pad2; + int _pad3; +}; + +struct N2Lights +{ + N2Light lights[16]; + float4 ambient_base[2]; + float4 ambient_offset[2]; + int2 ambient_material_base; + int2 ambient_material_offset; + int light_count; + int use_base_over; + int bump_id0; + int bump_id1; +}; + +float4 w_divide(float4 in_vpos, float4x4 ndc_mat, thread VertexOut& out) +{ + float4 vpos = float4(in_vpos.xy / in_vpos.w, 1.0 / in_vpos.w, 1.0); + vpos = ndc_mat * vpos; + if (pp_gouraud == 1) { + if (is_flat) { + out.flat_vtx_base *= vpos.z; + out.flat_vtx_offs *= vpos.z; + } else { + out.vtx_base *= vpos.z; + out.vtx_offs *= vpos.z; + } + } + out.vtx_uv = float3(out.vtx_uv.xy * vpos.z, vpos.z); + vpos.w = 1.0; + vpos.z = 0.0; + return vpos; +} + +void computeColors(constant N2VertexShaderUniforms& n2_uniforms, constant N2Lights& n2_lights, + thread float4 *base_col, thread float4 *offset_col, int vol_idx, float3 position, float3 normal) +{ + if (n2_uniforms.constant_color[vol_idx] == 1) + return; + + float3 diffuse = float3(0.0); + float3 specular = float3(0.0); + float diffuse_alpha = 0.0; + float specular_alpha = 0.0; + float3 reflect_dir = reflect(normalize(position), normal); + const float BASE_FACTOR = 2.0; + + for (int i = 0; i < n2_lights.light_count; i++) + { + float3 light_dir; // direction to the light + float3 light_color = n2_lights.lights[i].color.rgb; + if (n2_lights.lights[i].parallel == 1) + { + light_dir = normalize(n2_lights.lights[i].direction.xyz); + } + else + { + light_dir = normalize(n2_lights.lights[i].position.xyz - position); + if (n2_lights.lights[i].attn_dist_a != 1.0 || n2_lights.lights[i].attn_dist_b != 0.0) + { + float distance = length(n2_lights.lights[i].position.xyz - position); + if (n2_lights.lights[i].dist_attn_mode == 0) + distance = 1.0 / distance; + light_color *= clamp(n2_lights.lights[i].attn_dist_b * distance + n2_lights.lights[i].attn_dist_a, 0.0, 1.0); + } + if (n2_lights.lights[i].attn_angle_a != 1.0 || n2_lights.lights[i].attn_angle_b != 0.0) + { + float3 spot_dir = n2_lights.lights[i].direction.xyz; + float cos_angle = 1.0 - max(0.0, dot(light_dir, spot_dir)); + light_color *= clamp(cos_angle * n2_lights.lights[i].attn_angle_b + n2_lights.lights[i].attn_angle_a, 0.0, 1.0); + } + } + if (n2_lights.lights[i].diffuse[vol_idx] == 1) + { + float factor = (n2_lights.lights[i].routing & ROUTING_SUB) != 0 ? -BASE_FACTOR : BASE_FACTOR; + if (n2_lights.lights[i].dmode == LMODE_SINGLE_SIDED) + factor *= max(dot(normal, light_dir), 0.0); + else if (n2_lights.lights[i].dmode == LMODE_DOUBLE_SIDED) + factor *= abs(dot(normal, light_dir)); + + if ((n2_lights.lights[i].routing & ROUTING_ALPHA) != 0) + diffuse_alpha += light_color.r * factor; + else + { + if ((n2_lights.lights[i].routing & ROUTING_DIFF_TO_OFFSET) == 0) + diffuse += light_color * factor * base_col->rgb; + else + specular += light_color * factor * base_col->rgb; + } + } + if (n2_lights.lights[i].specular[vol_idx] == 1) + { + float factor = (n2_lights.lights[i].routing & ROUTING_SUB) != 0 ? -BASE_FACTOR : BASE_FACTOR; + if (n2_lights.lights[i].dmode == LMODE_SINGLE_SIDED) + factor *= clamp(pow(max(dot(light_dir, reflect_dir), 0.0), n2_uniforms.gloss_coef[vol_idx]), 0.0, 1.0); + else if (n2_lights.lights[i].dmode == LMODE_DOUBLE_SIDED) + factor *= clamp(pow(abs(dot(light_dir, reflect_dir)), n2_uniforms.gloss_coef[vol_idx]), 0.0, 1.0); + + if ((n2_lights.lights[i].routing & ROUTING_ALPHA) != 0) + specular_alpha += light_color.r * factor; + else + { + if ((n2_lights.lights[i].routing & ROUTING_DIFF_TO_OFFSET) == 0) + diffuse += light_color * factor * offset_col->rgb; + else + specular += light_color * factor * offset_col->rgb; + } + } + } + // ambient light + if (n2_lights.ambient_material_base[vol_idx] == 1) + diffuse += n2_lights.ambient_base[vol_idx].rgb * base_col->rgb; + else + diffuse += n2_lights.ambient_base[vol_idx].rgb; + if (n2_lights.ambient_material_offset[vol_idx] == 1) + diffuse += n2_lights.ambient_offset[vol_idx].rgb * offset_col->rgb; + else + diffuse += n2_lights.ambient_offset[vol_idx].rgb; + base_col->rgb = diffuse; + offset_col->rgb = specular; + + base_col->a += diffuse_alpha; + offset_col->a += specular_alpha; + if (n2_lights.use_base_over == 1) + { + float4 overflow = max(base_col->rgba - float4(1.0), 0.0); + offset_col->rgba += overflow; + } + base_col->rgba = clamp(base_col->rgba, 0.0, 1.0); + offset_col->rgba = clamp(offset_col->rgba, 0.0, 1.0); +} + +float2 computeEnvMap(float2 uv, float3 position, float3 normal) +{ + // Spherical mapping + // float3 r = reflect(normalize(position), normal); + // float m = 2.0 * sqrt(r.x * r.x + r.y * r.y + (r.z + 1.0) * (r.z + 1.0)); + // uv += r.xy / m + 0.5; + + // Cheap env mapping + uv += normal.xy / 2.0 + 0.5; + uv = clamp(uv, 0.0, 1.0); + + return uv; +} + +void computeBumpMap(constant N2Lights& n2_lights) +{ + // TODO + // if (n2_lights.bump_id0 == -1) + return; +} + +vertex VertexOut vs_main(N2VertexIn in [[stage_in]], + constant VertexShaderUniforms& uniforms [[buffer(0)]], + constant N2VertexShaderUniforms& n2_uniforms [[buffer(1)]], + constant N2Lights& n2_lights [[buffer(2)]]) +{ + float4 vpos = n2_uniforms.mv_mat * in.in_pos; + + VertexOut out = {}; + if (is_flat) { + out.flat_vtx_base = in.in_base; + out.flat_vtx_offs = in.in_offs; + } else { + out.vtx_base = in.in_base; + out.vtx_offs = in.in_offs; + } + + float3 vnorm = normalize(float3x3(n2_uniforms.normal_mat[0].xyz, + n2_uniforms.normal_mat[1].xyz, + n2_uniforms.normal_mat[2].xyz) * in.in_normal); + + // TODO bump mapping + if (n2_uniforms.bump_mapping == 0) { + computeColors(n2_uniforms, n2_lights, + &out.vtx_base, &out.vtx_offs, 0, vpos.xyz, vnorm); + } + + out.vtx_uv.xy = in.in_uv; + if (n2_uniforms.env_mapping[0] == 1) + out.vtx_uv.xy = computeEnvMap(out.vtx_uv.xy, vpos.xyz, vnorm); + + vpos = n2_uniforms.proj_mat * vpos; + vpos = w_divide(vpos, uniforms.ndc_mat, out); + + out.position = vpos; + return out; +} +)"; + +extern const char MTLN2ModVolVertexShaderSource[] = R"( +#include + +using namespace metal; + +struct VertexShaderUniforms +{ + float4x4 ndc_mat; +}; + +struct N2VertexShaderUniforms +{ + float4x4 mv_mat; + float4x4 normal_mat; + float4x4 proj_mat; + int2 env_mapping; + int bump_mapping; + int poly_number; + + float2 gloss_coef; + int2 constant_color; +}; + +struct VertexIn +{ + float4 in_pos [[attribute(0)]]; +}; + +struct VertexOut +{ + float depth; + float4 position [[position]]; +}; + +float4 w_divide(float4 in_vpos, float4x4 ndc_mat, thread VertexOut& out) +{ + float4 vpos = float4(in_vpos.xy / in_vpos.w, 1.0 / in_vpos.w, 1.0); + vpos = ndc_mat * vpos; + out.depth = vpos.z; + vpos.w = 1.0; + vpos.z = 0.0; + return vpos; +} + +vertex VertexOut vs_main(VertexIn in [[stage_in]], + constant VertexShaderUniforms& uniforms [[buffer(0)]], + constant N2VertexShaderUniforms& n2_uniforms [[buffer(1)]]) +{ + VertexOut out = {}; + + float4 vpos = n2_uniforms.mv_mat * in.in_pos; + vpos.z = min(vpos.z, -0.01); + vpos = n2_uniforms.proj_mat * vpos; + vpos = w_divide(vpos, uniforms.ndc_mat, out); + out.position = vpos; + + return out; +} +)"; MetalShaders::MetalShaders() { auto device = MetalContext::Instance()->GetDevice(); @@ -545,6 +880,13 @@ fragment float4 fs_main(VertexOut in [[stage_in]], assert(false); } + n2VertexShaderLibrary = [device newLibraryWithSource:[NSString stringWithUTF8String:N2VertexShaderSource] options:nil error:&error]; + + if (!n2VertexShaderLibrary) { + ERROR_LOG(RENDERER, "%s", [[error localizedDescription] UTF8String]); + assert(false); + } + blitShaderLibrary = [device newLibraryWithSource:[NSString stringWithUTF8String:BlitShader] options:nil error:&error]; if (!blitShaderLibrary) { @@ -574,7 +916,14 @@ fragment float4 fs_main(VertexOut in [[stage_in]], assert(false); } - quadShaderLibrary = [device newLibraryWithSource:[NSString stringWithUTF8String:QuadVertexShaderSource] options:nil error:&error]; + n2ModVolVertexShaderLibrary = [device newLibraryWithSource:[NSString stringWithUTF8String:MTLN2ModVolVertexShaderSource] options:nil error:&error]; + + if (!n2ModVolVertexShaderLibrary) { + ERROR_LOG(RENDERER, "%s", [[error localizedDescription] UTF8String]); + assert(false); + } + + quadShaderLibrary = [device newLibraryWithSource:[NSString stringWithUTF8String:QuadShaderSource] options:nil error:&error]; quadShaderConstants = [[MTLFunctionConstantValues alloc] init]; if (!quadShaderLibrary) { @@ -586,11 +935,18 @@ fragment float4 fs_main(VertexOut in [[stage_in]], id MetalShaders::compileShader(const MetalVertexShaderParams ¶ms) { [vertexShaderConstants setConstantValue:¶ms.gouraud type:MTLDataTypeBool atIndex:0]; - [vertexShaderConstants setConstantValue:¶ms.divPosZ type:MTLDataTypeBool atIndex:1]; NSError* error = nil; - id function = [vertexShaderLibrary newFunctionWithName:@"vs_main" constantValues:vertexShaderConstants error:&error]; + id function; + + if (params.naomi2) { + function = [n2VertexShaderLibrary newFunctionWithName:@"vs_main" constantValues:vertexShaderConstants error:&error]; + } else { + [vertexShaderConstants setConstantValue:¶ms.divPosZ type:MTLDataTypeBool atIndex:1]; + + function = [vertexShaderLibrary newFunctionWithName:@"vs_main" constantValues:vertexShaderConstants error:&error]; + } if (!function) { ERROR_LOG(RENDERER, "%s", [[error localizedDescription] UTF8String]); @@ -634,8 +990,13 @@ fragment float4 fs_main(VertexOut in [[stage_in]], NSError* error = nil; - // TODO: Naomi2 ModVol Frag Shader - id function = [modVolShaderLibrary newFunctionWithName:@"vs_main" constantValues:modVolShaderConstants error:&error]; + id function; + + if (params.naomi2) { + function = [n2ModVolVertexShaderLibrary newFunctionWithName:@"vs_main" constantValues:[[MTLFunctionConstantValues alloc] init] error:&error]; + } else { + function = [modVolShaderLibrary newFunctionWithName:@"vs_main" constantValues:modVolShaderConstants error:&error]; + } if (!function) { ERROR_LOG(RENDERER, "%s", [[error localizedDescription] UTF8String]); From 84c1bafc9adac3c1812982eba35502b63b0a5f90 Mon Sep 17 00:00:00 2001 From: Isaac Marovitz Date: Sat, 14 Jun 2025 18:19:29 -0400 Subject: [PATCH 40/48] Fix some Naomi 2 typos Signed-off-by: Isaac Marovitz --- core/rend/metal/metal_shaders.mm | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/core/rend/metal/metal_shaders.mm b/core/rend/metal/metal_shaders.mm index bf10d743f0..30d040b077 100644 --- a/core/rend/metal/metal_shaders.mm +++ b/core/rend/metal/metal_shaders.mm @@ -702,16 +702,16 @@ void computeColors(constant N2VertexShaderUniforms& n2_uniforms, constant N2Ligh if (n2_lights.lights[i].specular[vol_idx] == 1) { float factor = (n2_lights.lights[i].routing & ROUTING_SUB) != 0 ? -BASE_FACTOR : BASE_FACTOR; - if (n2_lights.lights[i].dmode == LMODE_SINGLE_SIDED) + if (n2_lights.lights[i].smode == LMODE_SINGLE_SIDED) factor *= clamp(pow(max(dot(light_dir, reflect_dir), 0.0), n2_uniforms.gloss_coef[vol_idx]), 0.0, 1.0); - else if (n2_lights.lights[i].dmode == LMODE_DOUBLE_SIDED) + else if (n2_lights.lights[i].smode == LMODE_DOUBLE_SIDED) factor *= clamp(pow(abs(dot(light_dir, reflect_dir)), n2_uniforms.gloss_coef[vol_idx]), 0.0, 1.0); if ((n2_lights.lights[i].routing & ROUTING_ALPHA) != 0) specular_alpha += light_color.r * factor; else { - if ((n2_lights.lights[i].routing & ROUTING_DIFF_TO_OFFSET) == 0) + if ((n2_lights.lights[i].routing & ROUTING_SPEC_TO_OFFSET) == 0) diffuse += light_color * factor * offset_col->rgb; else specular += light_color * factor * offset_col->rgb; @@ -724,9 +724,9 @@ void computeColors(constant N2VertexShaderUniforms& n2_uniforms, constant N2Ligh else diffuse += n2_lights.ambient_base[vol_idx].rgb; if (n2_lights.ambient_material_offset[vol_idx] == 1) - diffuse += n2_lights.ambient_offset[vol_idx].rgb * offset_col->rgb; + specular += n2_lights.ambient_offset[vol_idx].rgb * offset_col->rgb; else - diffuse += n2_lights.ambient_offset[vol_idx].rgb; + specular += n2_lights.ambient_offset[vol_idx].rgb; base_col->rgb = diffuse; offset_col->rgb = specular; From 434107ce9cfbc178c68c51416b5e2da0b45eb693 Mon Sep 17 00:00:00 2001 From: Isaac Marovitz Date: Sun, 15 Jun 2025 22:12:09 -0400 Subject: [PATCH 41/48] Fix more shader typos Signed-off-by: Isaac Marovitz --- core/rend/metal/metal_shaders.mm | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/core/rend/metal/metal_shaders.mm b/core/rend/metal/metal_shaders.mm index 30d040b077..8b0ee631d5 100644 --- a/core/rend/metal/metal_shaders.mm +++ b/core/rend/metal/metal_shaders.mm @@ -31,7 +31,7 @@ constant bool div_pos_z [[function_constant(1)]]; constant bool is_flat = pp_gouraud == 0; -constant bool is_not_flag = !is_flat; +constant bool is_not_flat = !is_flat; struct VertexShaderUniforms { @@ -50,8 +50,8 @@ { float4 flat_vtx_base [[flat, function_constant(is_flat)]]; float4 flat_vtx_offs [[flat, function_constant(is_flat)]]; - float4 vtx_base [[function_constant(is_not_flag)]]; - float4 vtx_offs [[function_constant(is_not_flag)]]; + float4 vtx_base [[function_constant(is_not_flat)]]; + float4 vtx_offs [[function_constant(is_not_flat)]]; float3 vtx_uv; float4 position [[position]]; }; @@ -122,7 +122,7 @@ vertex VertexOut vs_main(VertexIn in [[stage_in]], constant VertexShaderUniforms constant bool has_fog_table = pp_fog_ctrl != 2; constant bool has_palette = pp_palette != 0; constant bool is_flat = pp_gouraud == 0; -constant bool is_not_flag = !is_flat; +constant bool is_not_flat = !is_flat; struct FragmentShaderUniforms { @@ -146,8 +146,8 @@ vertex VertexOut vs_main(VertexIn in [[stage_in]], constant VertexShaderUniforms { float4 flat_vtx_base [[flat, function_constant(is_flat)]]; float4 flat_vtx_offs [[flat, function_constant(is_flat)]]; - float4 vtx_base [[function_constant(is_not_flag)]]; - float4 vtx_offs [[function_constant(is_not_flag)]]; + float4 vtx_base [[function_constant(is_not_flat)]]; + float4 vtx_offs [[function_constant(is_not_flat)]]; float3 vtx_uv; float4 position [[position]]; }; @@ -531,7 +531,7 @@ fragment float4 fs_main(VertexOut in [[stage_in]], constant bool pp_gouraud [[function_constant(0)]]; constant bool is_flat = pp_gouraud == 0; -constant bool is_not_flag = !is_flat; +constant bool is_not_flat = !is_flat; struct VertexShaderUniforms { @@ -564,8 +564,8 @@ fragment float4 fs_main(VertexOut in [[stage_in]], { float4 flat_vtx_base [[flat, function_constant(is_flat)]]; float4 flat_vtx_offs [[flat, function_constant(is_flat)]]; - float4 vtx_base [[function_constant(is_not_flag)]]; - float4 vtx_offs [[function_constant(is_not_flag)]]; + float4 vtx_base [[function_constant(is_not_flat)]]; + float4 vtx_offs [[function_constant(is_not_flat)]]; float3 vtx_uv; float4 position [[position]]; }; @@ -784,8 +784,13 @@ vertex VertexOut vs_main(N2VertexIn in [[stage_in]], // TODO bump mapping if (n2_uniforms.bump_mapping == 0) { - computeColors(n2_uniforms, n2_lights, - &out.vtx_base, &out.vtx_offs, 0, vpos.xyz, vnorm); + if (is_flat) { + computeColors(n2_uniforms, n2_lights, + &out.flat_vtx_base, &out.flat_vtx_offs, 0, vpos.xyz, vnorm); + } else { + computeColors(n2_uniforms, n2_lights, + &out.vtx_base, &out.vtx_offs, 0, vpos.xyz, vnorm); + } } out.vtx_uv.xy = in.in_uv; @@ -851,7 +856,7 @@ vertex VertexOut vs_main(VertexIn in [[stage_in]], VertexOut out = {}; float4 vpos = n2_uniforms.mv_mat * in.in_pos; - vpos.z = min(vpos.z, -0.01); + vpos.z = min(vpos.z, -0.001); vpos = n2_uniforms.proj_mat * vpos; vpos = w_divide(vpos, uniforms.ndc_mat, out); out.position = vpos; From c92bc54cf651d28c04147c19a56bc422f8b27c8d Mon Sep 17 00:00:00 2001 From: Isaac Marovitz Date: Mon, 16 Jun 2025 04:45:30 -0400 Subject: [PATCH 42/48] Fixes, but still broken and i dont know why Signed-off-by: Isaac Marovitz --- core/hw/pvr/elan.cpp | 2 +- core/rend/metal/metal_drawer.h | 12 +----- core/rend/metal/metal_drawer.mm | 6 ++- core/rend/metal/metal_shaders.mm | 71 ++++++++++++++++---------------- 4 files changed, 42 insertions(+), 49 deletions(-) diff --git a/core/hw/pvr/elan.cpp b/core/hw/pvr/elan.cpp index 7b4c852027..01c19661a1 100644 --- a/core/hw/pvr/elan.cpp +++ b/core/hw/pvr/elan.cpp @@ -1162,7 +1162,7 @@ static void sendLights() return; state.lightModelUpdated = false; - N2LightModel model; + N2LightModel model {}; model.lightCount = 0; if (curLightModel == nullptr) { diff --git a/core/rend/metal/metal_drawer.h b/core/rend/metal/metal_drawer.h index d687a09bac..b9f463a77b 100644 --- a/core/rend/metal/metal_drawer.h +++ b/core/rend/metal/metal_drawer.h @@ -166,20 +166,10 @@ class MetalBaseDrawer { u64 offset = -1; - size_t n2LightSize = sizeof(N2LightModel) + MetalBufferPacker::align(sizeof(N2LightModel), 16); - if (n2LightSize == sizeof(N2LightModel) && !pvrrc.lightModels.empty()) + if (!pvrrc.lightModels.empty()) { offset = packer.addUniform(&pvrrc.lightModels[0], pvrrc.lightModels.size() * sizeof(decltype(pvrrc.lightModels[0]))); } - else - { - for (const N2LightModel& model : pvrrc.lightModels) - { - u64 ioffset = packer.addUniform(&model, sizeof(N2LightModel)); - if (offset == (u64)-1) - offset = ioffset; - } - } return offset; } diff --git a/core/rend/metal/metal_drawer.mm b/core/rend/metal/metal_drawer.mm index 1ecf87a839..e322bc579b 100644 --- a/core/rend/metal/metal_drawer.mm +++ b/core/rend/metal/metal_drawer.mm @@ -176,8 +176,10 @@ size_t size = sizeof(MetalN2VertexShaderUniforms) + MetalBufferPacker::align(sizeof(MetalN2VertexShaderUniforms), 16); [encoder setVertexBuffer:curMainBuffer offset:offset + index * size atIndex:1]; - size = sizeof(N2LightModel) + MetalBufferPacker::align(sizeof(N2LightModel), 16); - [encoder setVertexBuffer:curMainBuffer offset:offsets.lightsOffset + poly.lightModel * size atIndex:2]; + if (offsets.lightsOffset != -1) { + size = sizeof(N2LightModel) + MetalBufferPacker::align(sizeof(N2LightModel), 16); + [encoder setVertexBuffer:curMainBuffer offset:offsets.lightsOffset + poly.lightModel * size atIndex:2]; + } } MTLPrimitiveType primitive = sortTriangles && !config::PerStripSorting ? MTLPrimitiveTypeTriangle : MTLPrimitiveTypeTriangleStrip; diff --git a/core/rend/metal/metal_shaders.mm b/core/rend/metal/metal_shaders.mm index 8b0ee631d5..4903dd0eb6 100644 --- a/core/rend/metal/metal_shaders.mm +++ b/core/rend/metal/metal_shaders.mm @@ -170,7 +170,7 @@ float fog_mode2(float w, constant FragmentShaderUniforms& uniforms, } float exp = floor(log2(z)); - float m = z * 16.0 / pow(2.0, exp) - 16.0; + float m = z * 16.0 / powr(2.0, exp) - 16.0; float idx = floor(m) + exp * 16.0 + 0.5; float4 fog_coef = fog_table.sample(fog_table_sampler, float2(idx / 128.0, 0.75 - (m - floor(m)) / 2.0)); return fog_coef.r; @@ -644,7 +644,7 @@ float4 w_divide(float4 in_vpos, float4x4 ndc_mat, thread VertexOut& out) } void computeColors(constant N2VertexShaderUniforms& n2_uniforms, constant N2Lights& n2_lights, - thread float4 *base_col, thread float4 *offset_col, int vol_idx, float3 position, float3 normal) + thread float4& base_col, thread float4& offset_col, int vol_idx, float3 position, float3 normal) { if (n2_uniforms.constant_color[vol_idx] == 1) return; @@ -694,51 +694,51 @@ void computeColors(constant N2VertexShaderUniforms& n2_uniforms, constant N2Ligh else { if ((n2_lights.lights[i].routing & ROUTING_DIFF_TO_OFFSET) == 0) - diffuse += light_color * factor * base_col->rgb; + diffuse += light_color * factor * base_col.rgb; else - specular += light_color * factor * base_col->rgb; + specular += light_color * factor * base_col.rgb; } } if (n2_lights.lights[i].specular[vol_idx] == 1) { float factor = (n2_lights.lights[i].routing & ROUTING_SUB) != 0 ? -BASE_FACTOR : BASE_FACTOR; if (n2_lights.lights[i].smode == LMODE_SINGLE_SIDED) - factor *= clamp(pow(max(dot(light_dir, reflect_dir), 0.0), n2_uniforms.gloss_coef[vol_idx]), 0.0, 1.0); + factor *= clamp(powr(max(dot(light_dir, reflect_dir), 0.0), n2_uniforms.gloss_coef[vol_idx]), 0.0, 1.0); else if (n2_lights.lights[i].smode == LMODE_DOUBLE_SIDED) - factor *= clamp(pow(abs(dot(light_dir, reflect_dir)), n2_uniforms.gloss_coef[vol_idx]), 0.0, 1.0); + factor *= clamp(powr(abs(dot(light_dir, reflect_dir)), n2_uniforms.gloss_coef[vol_idx]), 0.0, 1.0); if ((n2_lights.lights[i].routing & ROUTING_ALPHA) != 0) specular_alpha += light_color.r * factor; else { if ((n2_lights.lights[i].routing & ROUTING_SPEC_TO_OFFSET) == 0) - diffuse += light_color * factor * offset_col->rgb; + diffuse += light_color * factor * offset_col.rgb; else - specular += light_color * factor * offset_col->rgb; + specular += light_color * factor * offset_col.rgb; } } } // ambient light if (n2_lights.ambient_material_base[vol_idx] == 1) - diffuse += n2_lights.ambient_base[vol_idx].rgb * base_col->rgb; + diffuse += n2_lights.ambient_base[vol_idx].rgb * base_col.rgb; else diffuse += n2_lights.ambient_base[vol_idx].rgb; if (n2_lights.ambient_material_offset[vol_idx] == 1) - specular += n2_lights.ambient_offset[vol_idx].rgb * offset_col->rgb; + specular += n2_lights.ambient_offset[vol_idx].rgb * offset_col.rgb; else specular += n2_lights.ambient_offset[vol_idx].rgb; - base_col->rgb = diffuse; - offset_col->rgb = specular; + base_col.rgb = diffuse; + offset_col.rgb = specular; - base_col->a += diffuse_alpha; - offset_col->a += specular_alpha; + base_col.a += diffuse_alpha; + offset_col.a += specular_alpha; if (n2_lights.use_base_over == 1) { - float4 overflow = max(base_col->rgba - float4(1.0), 0.0); - offset_col->rgba += overflow; + float4 overflow = max(base_col - float4(1.0), 0.0); + offset_col += overflow; } - base_col->rgba = clamp(base_col->rgba, 0.0, 1.0); - offset_col->rgba = clamp(offset_col->rgba, 0.0, 1.0); + base_col = clamp(base_col, 0.0, 1.0); + offset_col = clamp(offset_col, 0.0, 1.0); } float2 computeEnvMap(float2 uv, float3 position, float3 normal) @@ -749,7 +749,7 @@ float2 computeEnvMap(float2 uv, float3 position, float3 normal) // uv += r.xy / m + 0.5; // Cheap env mapping - uv += normal.xy / 2.0 + 0.5; + uv += (normal.xy / 2.0) + 0.5; uv = clamp(uv, 0.0, 1.0); return uv; @@ -769,31 +769,32 @@ vertex VertexOut vs_main(N2VertexIn in [[stage_in]], { float4 vpos = n2_uniforms.mv_mat * in.in_pos; - VertexOut out = {}; - if (is_flat) { - out.flat_vtx_base = in.in_base; - out.flat_vtx_offs = in.in_offs; - } else { - out.vtx_base = in.in_base; - out.vtx_offs = in.in_offs; - } - float3 vnorm = normalize(float3x3(n2_uniforms.normal_mat[0].xyz, n2_uniforms.normal_mat[1].xyz, n2_uniforms.normal_mat[2].xyz) * in.in_normal); + float4 base = in.in_base; + float4 offset = in.in_offs; + // TODO bump mapping if (n2_uniforms.bump_mapping == 0) { - if (is_flat) { - computeColors(n2_uniforms, n2_lights, - &out.flat_vtx_base, &out.flat_vtx_offs, 0, vpos.xyz, vnorm); - } else { - computeColors(n2_uniforms, n2_lights, - &out.vtx_base, &out.vtx_offs, 0, vpos.xyz, vnorm); - } + computeColors(n2_uniforms, n2_lights, + base, offset, 0, vpos.xyz, vnorm); + base += offset; + } + + VertexOut out = {}; + + if (is_flat) { + out.flat_vtx_base = base; + out.flat_vtx_offs = offset; + } else { + out.vtx_base = base; + out.vtx_offs = offset; } out.vtx_uv.xy = in.in_uv; + if (n2_uniforms.env_mapping[0] == 1) out.vtx_uv.xy = computeEnvMap(out.vtx_uv.xy, vpos.xyz, vnorm); From c4a7007715f91395036bb66ea2abcd6a3ad7321b Mon Sep 17 00:00:00 2001 From: Isaac Marovitz Date: Tue, 17 Jun 2025 13:04:06 -0400 Subject: [PATCH 43/48] Fix normal attribute Signed-off-by: Isaac Marovitz --- core/rend/metal/metal_shaders.mm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/rend/metal/metal_shaders.mm b/core/rend/metal/metal_shaders.mm index 4903dd0eb6..1a9e5a855d 100644 --- a/core/rend/metal/metal_shaders.mm +++ b/core/rend/metal/metal_shaders.mm @@ -557,7 +557,7 @@ fragment float4 fs_main(VertexOut in [[stage_in]], float4 in_base [[attribute(1)]]; float4 in_offs [[attribute(2)]]; float2 in_uv [[attribute(3)]]; - float3 in_normal [[attribute(4)]]; + float3 in_normal [[attribute(7)]]; }; struct VertexOut From 269f7adc6bf7c9d5a53c0d88aa0eff43e14693c0 Mon Sep 17 00:00:00 2001 From: Isaac Marovitz Date: Tue, 17 Jun 2025 14:17:31 -0400 Subject: [PATCH 44/48] Cleanup + Enable fast math Signed-off-by: Isaac Marovitz --- core/rend/metal/metal_shaders.mm | 55 ++++++++++++++------------------ 1 file changed, 24 insertions(+), 31 deletions(-) diff --git a/core/rend/metal/metal_shaders.mm b/core/rend/metal/metal_shaders.mm index 1a9e5a855d..0c3f6d9c91 100644 --- a/core/rend/metal/metal_shaders.mm +++ b/core/rend/metal/metal_shaders.mm @@ -624,9 +624,9 @@ fragment float4 fs_main(VertexOut in [[stage_in]], int bump_id1; }; -float4 w_divide(float4 in_vpos, float4x4 ndc_mat, thread VertexOut& out) +void w_divide(thread float4& vpos, float4x4 ndc_mat, thread VertexOut& out) { - float4 vpos = float4(in_vpos.xy / in_vpos.w, 1.0 / in_vpos.w, 1.0); + vpos = float4(vpos.xy / vpos.w, 1.0 / vpos.w, 1.0); vpos = ndc_mat * vpos; if (pp_gouraud == 1) { if (is_flat) { @@ -640,10 +640,9 @@ float4 w_divide(float4 in_vpos, float4x4 ndc_mat, thread VertexOut& out) out.vtx_uv = float3(out.vtx_uv.xy * vpos.z, vpos.z); vpos.w = 1.0; vpos.z = 0.0; - return vpos; } -void computeColors(constant N2VertexShaderUniforms& n2_uniforms, constant N2Lights& n2_lights, +void compute_colors(constant N2VertexShaderUniforms& n2_uniforms, constant N2Lights& n2_lights, thread float4& base_col, thread float4& offset_col, int vol_idx, float3 position, float3 normal) { if (n2_uniforms.constant_color[vol_idx] == 1) @@ -741,25 +740,16 @@ void computeColors(constant N2VertexShaderUniforms& n2_uniforms, constant N2Ligh offset_col = clamp(offset_col, 0.0, 1.0); } -float2 computeEnvMap(float2 uv, float3 position, float3 normal) +void compute_env_map(thread float3& uv, float3 position, float3 normal) { // Spherical mapping // float3 r = reflect(normalize(position), normal); // float m = 2.0 * sqrt(r.x * r.x + r.y * r.y + (r.z + 1.0) * (r.z + 1.0)); - // uv += r.xy / m + 0.5; + // uv.xy += r.xy / m + 0.5; // Cheap env mapping - uv += (normal.xy / 2.0) + 0.5; - uv = clamp(uv, 0.0, 1.0); - - return uv; -} - -void computeBumpMap(constant N2Lights& n2_lights) -{ - // TODO - // if (n2_lights.bump_id0 == -1) - return; + uv.xy += (normal.xy / 2.0) + 0.5; + uv.xy = clamp(uv.xy, 0.0, 1.0); } vertex VertexOut vs_main(N2VertexIn in [[stage_in]], @@ -778,7 +768,7 @@ vertex VertexOut vs_main(N2VertexIn in [[stage_in]], // TODO bump mapping if (n2_uniforms.bump_mapping == 0) { - computeColors(n2_uniforms, n2_lights, + compute_colors(n2_uniforms, n2_lights, base, offset, 0, vpos.xyz, vnorm); base += offset; } @@ -796,10 +786,10 @@ vertex VertexOut vs_main(N2VertexIn in [[stage_in]], out.vtx_uv.xy = in.in_uv; if (n2_uniforms.env_mapping[0] == 1) - out.vtx_uv.xy = computeEnvMap(out.vtx_uv.xy, vpos.xyz, vnorm); + compute_env_map(out.vtx_uv, vpos.xyz, vnorm); vpos = n2_uniforms.proj_mat * vpos; - vpos = w_divide(vpos, uniforms.ndc_mat, out); + w_divide(vpos, uniforms.ndc_mat, out); out.position = vpos; return out; @@ -840,14 +830,13 @@ vertex VertexOut vs_main(N2VertexIn in [[stage_in]], float4 position [[position]]; }; -float4 w_divide(float4 in_vpos, float4x4 ndc_mat, thread VertexOut& out) +void w_divide(thread float4& vpos, float4x4 ndc_mat, thread VertexOut& out) { - float4 vpos = float4(in_vpos.xy / in_vpos.w, 1.0 / in_vpos.w, 1.0); + vpos = float4(vpos.xy / vpos.w, 1.0 / vpos.w, 1.0); vpos = ndc_mat * vpos; out.depth = vpos.z; vpos.w = 1.0; vpos.z = 0.0; - return vpos; } vertex VertexOut vs_main(VertexIn in [[stage_in]], @@ -859,7 +848,8 @@ vertex VertexOut vs_main(VertexIn in [[stage_in]], float4 vpos = n2_uniforms.mv_mat * in.in_pos; vpos.z = min(vpos.z, -0.001); vpos = n2_uniforms.proj_mat * vpos; - vpos = w_divide(vpos, uniforms.ndc_mat, out); + w_divide(vpos, uniforms.ndc_mat, out); + out.position = vpos; return out; @@ -869,8 +859,11 @@ vertex VertexOut vs_main(VertexIn in [[stage_in]], MetalShaders::MetalShaders() { auto device = MetalContext::Instance()->GetDevice(); + MTLCompileOptions* compileOptions = [[MTLCompileOptions alloc] init]; + [compileOptions setFastMathEnabled:YES]; + NSError* error = nil; - fragmentShaderLibrary = [device newLibraryWithSource:[NSString stringWithUTF8String:FragmentShaderSource] options:nil error:&error]; + fragmentShaderLibrary = [device newLibraryWithSource:[NSString stringWithUTF8String:FragmentShaderSource] options:compileOptions error:&error]; fragmentShaderConstants = [[MTLFunctionConstantValues alloc] init]; if (!fragmentShaderLibrary) { @@ -878,7 +871,7 @@ vertex VertexOut vs_main(VertexIn in [[stage_in]], assert(false); } - vertexShaderLibrary = [device newLibraryWithSource:[NSString stringWithUTF8String:VertexShaderSource] options:nil error:&error]; + vertexShaderLibrary = [device newLibraryWithSource:[NSString stringWithUTF8String:VertexShaderSource] options:compileOptions error:&error]; vertexShaderConstants = [[MTLFunctionConstantValues alloc] init]; if (!vertexShaderLibrary) { @@ -886,14 +879,14 @@ vertex VertexOut vs_main(VertexIn in [[stage_in]], assert(false); } - n2VertexShaderLibrary = [device newLibraryWithSource:[NSString stringWithUTF8String:N2VertexShaderSource] options:nil error:&error]; + n2VertexShaderLibrary = [device newLibraryWithSource:[NSString stringWithUTF8String:N2VertexShaderSource] options:compileOptions error:&error]; if (!n2VertexShaderLibrary) { ERROR_LOG(RENDERER, "%s", [[error localizedDescription] UTF8String]); assert(false); } - blitShaderLibrary = [device newLibraryWithSource:[NSString stringWithUTF8String:BlitShader] options:nil error:&error]; + blitShaderLibrary = [device newLibraryWithSource:[NSString stringWithUTF8String:BlitShader] options:compileOptions error:&error]; if (!blitShaderLibrary) { ERROR_LOG(RENDERER, "%s", [[error localizedDescription] UTF8String]); @@ -914,7 +907,7 @@ vertex VertexOut vs_main(VertexIn in [[stage_in]], assert(false); } - modVolShaderLibrary = [device newLibraryWithSource:[NSString stringWithUTF8String:ModVolShaderSource] options:nil error:&error]; + modVolShaderLibrary = [device newLibraryWithSource:[NSString stringWithUTF8String:ModVolShaderSource] options:compileOptions error:&error]; modVolShaderConstants = [[MTLFunctionConstantValues alloc] init]; if (!modVolShaderLibrary) { @@ -922,14 +915,14 @@ vertex VertexOut vs_main(VertexIn in [[stage_in]], assert(false); } - n2ModVolVertexShaderLibrary = [device newLibraryWithSource:[NSString stringWithUTF8String:MTLN2ModVolVertexShaderSource] options:nil error:&error]; + n2ModVolVertexShaderLibrary = [device newLibraryWithSource:[NSString stringWithUTF8String:MTLN2ModVolVertexShaderSource] options:compileOptions error:&error]; if (!n2ModVolVertexShaderLibrary) { ERROR_LOG(RENDERER, "%s", [[error localizedDescription] UTF8String]); assert(false); } - quadShaderLibrary = [device newLibraryWithSource:[NSString stringWithUTF8String:QuadShaderSource] options:nil error:&error]; + quadShaderLibrary = [device newLibraryWithSource:[NSString stringWithUTF8String:QuadShaderSource] options:compileOptions error:&error]; quadShaderConstants = [[MTLFunctionConstantValues alloc] init]; if (!quadShaderLibrary) { From 8b6b132adc2e517c69ae960970a88dea6d60ea9d Mon Sep 17 00:00:00 2001 From: Isaac Marovitz Date: Tue, 17 Jun 2025 15:01:14 -0400 Subject: [PATCH 45/48] Fix cull mode and winding Signed-off-by: Isaac Marovitz --- core/rend/metal/metal_drawer.h | 6 +++++ core/rend/metal/metal_drawer.mm | 33 +++++++++++++++---------- core/rend/metal/metal_pipeline.h | 40 +++++++++++++++---------------- core/rend/metal/metal_pipeline.mm | 16 ++++++------- 4 files changed, 54 insertions(+), 41 deletions(-) diff --git a/core/rend/metal/metal_drawer.h b/core/rend/metal/metal_drawer.h index b9f463a77b..e8c906e00e 100644 --- a/core/rend/metal/metal_drawer.h +++ b/core/rend/metal/metal_drawer.h @@ -174,6 +174,12 @@ class MetalBaseDrawer return offset; } + MTLCullMode toMetalCullMode(int cullMode) { + return cullMode == 3 ? MTLCullModeBack + : cullMode == 2 ? MTLCullModeFront + : MTLCullModeNone; + } + MTLScissorRect baseScissor {}; MTLScissorRect currentScissor {}; TransformMatrix matrices; diff --git a/core/rend/metal/metal_drawer.mm b/core/rend/metal/metal_drawer.mm index e322bc579b..b99d61a3d8 100644 --- a/core/rend/metal/metal_drawer.mm +++ b/core/rend/metal/metal_drawer.mm @@ -133,6 +133,7 @@ [encoder setRenderPipelineState:pipelineManager->GetPipeline(listType, sortTriangles, poly, gpuPalette, dithering)]; [encoder setDepthStencilState:pipelineManager->GetDepthStencilStates(listType, sortTriangles, shadowed, poly)]; + [encoder setCullMode:toMetalCullMode(poly.isp.CullMode)]; if (shadowed) { if (poly.pcw.Shadow != 0) { @@ -209,10 +210,14 @@ const PolyParam& polyParam = pvrrc.global_param_tr[param.polyIndex]; if (polyParam.isp.ZWriteDis) continue; - [encoder setRenderPipelineState:pipelineManager->GetDepthPassPipeline(polyParam.isp.CullMode, polyParam.isNaomi2())]; - [encoder setDepthStencilState:pipelineManager->GetDepthPassDepthStencilStates(polyParam.isp.CullMode, polyParam.isNaomi2())]; + [encoder setRenderPipelineState:pipelineManager->GetDepthPassPipeline(polyParam.isNaomi2())]; + [encoder setDepthStencilState:pipelineManager->GetDepthPassDepthStencilStates(polyParam.isNaomi2())]; + MTLScissorRect scissorRect {}; SetTileClip(encoder, polyParam.tileclip, scissorRect); + + [encoder setCullMode:toMetalCullMode(polyParam.isp.CullMode)]; + [encoder drawIndexedPrimitives:MTLPrimitiveTypeTriangle indexCount:param.count indexType:MTLIndexTypeUInt32 @@ -258,9 +263,6 @@ for (int cmv = 0; cmv < count; cmv++) { ModifierVolumeParam& param = params[cmv]; - MTLCullMode cull_mode = param.isp.CullMode == 3 ? MTLCullModeBack : param.isp.CullMode == 2 ? MTLCullModeFront : MTLCullModeNone; - [encoder setCullMode:cull_mode]; - [encoder setFrontFacingWinding:MTLWindingCounterClockwise]; if (param.count == 0) continue; @@ -271,15 +273,16 @@ mod_base = param.first; if (!param.isp.VolumeLast && mv_mode > 0) { - state = pipelineManager->GetModifierVolumePipeline(ModVolMode::Or, param.isp.CullMode, param.isNaomi2()); // OR'ing (open volume or quad) - depth_state = pipelineManager->GetModVolDepthStencilStates(ModVolMode::Or, param.isp.CullMode, param.isNaomi2()); + state = pipelineManager->GetModifierVolumePipeline(ModVolMode::Or, param.isNaomi2()); // OR'ing (open volume or quad) + depth_state = pipelineManager->GetModVolDepthStencilStates(ModVolMode::Or, param.isNaomi2()); } else { - state = pipelineManager->GetModifierVolumePipeline(ModVolMode::Xor, param.isp.CullMode, param.isNaomi2()); // XOR'ing (closed volume) - depth_state = pipelineManager->GetModVolDepthStencilStates(ModVolMode::Xor, param.isp.CullMode, param.isNaomi2()); + state = pipelineManager->GetModifierVolumePipeline(ModVolMode::Xor, param.isNaomi2()); // XOR'ing (closed volume) + depth_state = pipelineManager->GetModVolDepthStencilStates(ModVolMode::Xor, param.isNaomi2()); } [encoder setRenderPipelineState:state]; [encoder setDepthStencilState:depth_state]; + [encoder setCullMode:toMetalCullMode(param.isp.CullMode)]; [encoder setStencilReferenceValue:2]; MTLScissorRect scissorRect {}; SetTileClip(encoder, param.tileclip, scissorRect); @@ -292,10 +295,11 @@ if (mv_mode == 1 || mv_mode == 2) { // Sum the area - state = pipelineManager->GetModifierVolumePipeline(mv_mode == 1 ? ModVolMode::Inclusion : ModVolMode::Exclusion, param.isp.CullMode, param.isNaomi2()); - depth_state = pipelineManager->GetModVolDepthStencilStates(mv_mode == 1 ? ModVolMode::Inclusion : ModVolMode::Exclusion, param.isp.CullMode, param.isNaomi2()); + state = pipelineManager->GetModifierVolumePipeline(mv_mode == 1 ? ModVolMode::Inclusion : ModVolMode::Exclusion, param.isNaomi2()); + depth_state = pipelineManager->GetModVolDepthStencilStates(mv_mode == 1 ? ModVolMode::Inclusion : ModVolMode::Exclusion, param.isNaomi2()); [encoder setRenderPipelineState:state]; [encoder setDepthStencilState:depth_state]; + [encoder setCullMode:toMetalCullMode(param.isp.CullMode)]; [encoder setStencilReferenceValue:1]; [encoder drawPrimitives:MTLPrimitiveTypeTriangle vertexStart: mod_base * 3 @@ -305,10 +309,11 @@ } [encoder setVertexBufferOffset:0 atIndex:30]; - state = pipelineManager->GetModifierVolumePipeline(ModVolMode::Final, 0, false); - depth_state = pipelineManager->GetModVolDepthStencilStates(ModVolMode::Final, 0, false); + state = pipelineManager->GetModifierVolumePipeline(ModVolMode::Final, false); + depth_state = pipelineManager->GetModVolDepthStencilStates(ModVolMode::Final, false); [encoder setRenderPipelineState:state]; [encoder setDepthStencilState:depth_state]; + [encoder setCullMode:toMetalCullMode(0)]; [encoder setStencilReferenceValue:0x81]; [encoder drawIndexedPrimitives:MTLPrimitiveTypeTriangleStrip indexCount:4 @@ -396,6 +401,8 @@ MetalVertexShaderUniforms vtxUniforms {}; vtxUniforms.ndcMat = matrices.GetNormalMatrix(); + [renderEncoder setFrontFacingWinding:MTLWindingCounterClockwise]; + UploadMainBuffer(vtxUniforms, fragUniforms); [renderEncoder setVertexBuffer:curMainBuffer offset:0 atIndex:30]; diff --git a/core/rend/metal/metal_pipeline.h b/core/rend/metal/metal_pipeline.h index df7dcfebe3..9869d25c5b 100644 --- a/core/rend/metal/metal_pipeline.h +++ b/core/rend/metal/metal_pipeline.h @@ -53,24 +53,24 @@ class MetalPipelineManager return blitPassPipeline; } - id GetDepthPassPipeline(int cullMode, bool naomi2) + id GetDepthPassPipeline(bool naomi2) { - u32 pipehash = hash(cullMode, naomi2); + u32 pipehash = hash(naomi2); const auto &pipeline = depthPassPipelines.find(pipehash); if (pipeline != depthPassPipelines.end() && pipeline->second != nullptr) return pipeline->second; - CreateDepthPassPipeline(cullMode, naomi2); + CreateDepthPassPipeline(naomi2); return depthPassPipelines[pipehash]; } - id GetModifierVolumePipeline(ModVolMode mode, int cullMode, bool naomi2) + id GetModifierVolumePipeline(ModVolMode mode, bool naomi2) { - u32 pipehash = hash(mode, cullMode, naomi2); + u32 pipehash = hash(mode, naomi2); const auto &pipeline = modVolPipelines.find(pipehash); if (pipeline != modVolPipelines.end() && pipeline->second != nullptr) return pipeline->second; - CreateModVolPipeline(mode, cullMode, naomi2); + CreateModVolPipeline(mode, naomi2); return modVolPipelines[pipehash]; } @@ -86,24 +86,24 @@ class MetalPipelineManager return pipelines[pipehash]; } - id GetModVolDepthStencilStates(ModVolMode mode, int cullMode, bool naomi2) + id GetModVolDepthStencilStates(ModVolMode mode, bool naomi2) { - u32 pipehash = hash(mode, cullMode, naomi2); + u32 pipehash = hash(mode, naomi2); const auto &state = modVolStencilStates.find(pipehash); if (state != modVolStencilStates.end() && state->second != nullptr) return state->second; - CreateModVolDepthStencilState(mode, cullMode, naomi2); + CreateModVolDepthStencilState(mode, naomi2); return modVolStencilStates[pipehash]; } - id GetDepthPassDepthStencilStates(int cullMode, bool naomi2) + id GetDepthPassDepthStencilStates(bool naomi2) { - u32 pipehash = hash(cullMode, naomi2); + u32 pipehash = hash(naomi2); const auto &state = depthPassDepthStencilStates.find(pipehash); if (state != depthPassDepthStencilStates.end() && state->second != nullptr) return state->second; - CreateDepthPassDepthStencilState(cullMode, naomi2); + CreateDepthPassDepthStencilState(naomi2); return depthPassDepthStencilStates[pipehash]; } @@ -122,12 +122,12 @@ class MetalPipelineManager private: void CreateBlitPassPipeline(); - void CreateModVolPipeline(ModVolMode mode, int cullMode, bool naomi2); - void CreateDepthPassPipeline(int cullMode, bool naomi2); + void CreateModVolPipeline(ModVolMode mode, bool naomi2); + void CreateDepthPassPipeline(bool naomi2); void CreatePipeline(u32 listType, bool sortTriangles, const PolyParam& pp, int gpuPalette, bool dithering); - void CreateModVolDepthStencilState(ModVolMode mode, int cullMode, bool naomi2); - void CreateDepthPassDepthStencilState(int cullMode, bool naomi2); + void CreateModVolDepthStencilState(ModVolMode mode, bool naomi2); + void CreateDepthPassDepthStencilState(bool naomi2); void CreateDepthStencilState(u32 listType, bool sortTriangles, bool shadowed, const PolyParam& pp); u64 hash(u32 listType, bool sortTriangles, const PolyParam *pp, int gpuPalette, bool dithering) const @@ -163,13 +163,13 @@ class MetalPipelineManager return hash; } - u32 hash(ModVolMode mode, int cullMode, bool naomi2) const + u32 hash(ModVolMode mode, bool naomi2) const { - return ((int)mode << 2) | cullMode | ((int)naomi2 << 5) | ((int)(!settings.platform.isNaomi2() && config::NativeDepthInterpolation) << 6); + return ((int)mode << 2) | ((int)naomi2 << 5) | ((int)(!settings.platform.isNaomi2() && config::NativeDepthInterpolation) << 6); } - u32 hash(int cullMode, bool naomi2) const + u32 hash(bool naomi2) const { - return cullMode | ((int)naomi2 << 2) | ((int)(!settings.platform.isNaomi2() && config::NativeDepthInterpolation) << 3); + return ((int)naomi2 << 2) | ((int)(!settings.platform.isNaomi2() && config::NativeDepthInterpolation) << 3); } MTLVertexDescriptor* GetMainVertexInputDescriptor(bool full = true, bool naomi2 = false) const diff --git a/core/rend/metal/metal_pipeline.mm b/core/rend/metal/metal_pipeline.mm index c3671bac8a..db1ed72371 100644 --- a/core/rend/metal/metal_pipeline.mm +++ b/core/rend/metal/metal_pipeline.mm @@ -46,7 +46,7 @@ blitPassPipeline = state; } -void MetalPipelineManager::CreateModVolPipeline(ModVolMode mode, int cullMode, bool naomi2) { +void MetalPipelineManager::CreateModVolPipeline(ModVolMode mode, bool naomi2) { MTLVertexDescriptor *vertexDesc = nil; MTLRenderPipelineDescriptor *descriptor = [[MTLRenderPipelineDescriptor alloc] init]; @@ -94,10 +94,10 @@ ERROR_LOG(RENDERER, "Failed to create Depth Render Pipeline State: %s", [[error localizedDescription] UTF8String]); } - modVolPipelines[hash(mode, cullMode, naomi2)] = state; + modVolPipelines[hash(mode, naomi2)] = state; } -void MetalPipelineManager::CreateDepthPassPipeline(int cullMode, bool naomi2) +void MetalPipelineManager::CreateDepthPassPipeline(bool naomi2) { MTLRenderPipelineDescriptor *descriptor = [[MTLRenderPipelineDescriptor alloc] init]; [descriptor setLabel:@"Depth Pass"]; @@ -124,7 +124,7 @@ ERROR_LOG(RENDERER, "Failed to create Depth Render Pipeline State: %s", [[error localizedDescription] UTF8String]); } - depthPassPipelines[hash(cullMode, naomi2)] = state; + depthPassPipelines[hash(naomi2)] = state; } void MetalPipelineManager::CreatePipeline(u32 listType, bool sortTriangles, const PolyParam &pp, int gpuPalette, bool dithering) { @@ -186,7 +186,7 @@ pipelines[hash(listType, sortTriangles, &pp, gpuPalette, dithering)] = state; } -void MetalPipelineManager::CreateModVolDepthStencilState(ModVolMode mode, int cullMode, bool naomi2) { +void MetalPipelineManager::CreateModVolDepthStencilState(ModVolMode mode, bool naomi2) { MTLDepthStencilDescriptor *descriptor = [[MTLDepthStencilDescriptor alloc] init]; [descriptor setDepthWriteEnabled:false]; [descriptor setDepthCompareFunction:mode == ModVolMode::Xor || mode == ModVolMode::Or ? MTLCompareFunctionGreater : MTLCompareFunctionAlways]; @@ -241,10 +241,10 @@ auto state = [MetalContext::Instance()->GetDevice() newDepthStencilStateWithDescriptor:descriptor]; - modVolStencilStates[hash(mode, cullMode, naomi2)] = state; + modVolStencilStates[hash(mode, naomi2)] = state; } -void MetalPipelineManager::CreateDepthPassDepthStencilState(int cullMode, bool naomi2) { +void MetalPipelineManager::CreateDepthPassDepthStencilState(bool naomi2) { MTLDepthStencilDescriptor *descriptor = [[MTLDepthStencilDescriptor alloc] init]; [descriptor setLabel:@"Sorted Depth Pass"]; [descriptor setDepthWriteEnabled:true]; @@ -252,7 +252,7 @@ auto state = [MetalContext::Instance()->GetDevice() newDepthStencilStateWithDescriptor:descriptor]; - depthPassDepthStencilStates[hash(cullMode, naomi2)] = state; + depthPassDepthStencilStates[hash(naomi2)] = state; } void MetalPipelineManager::CreateDepthStencilState(u32 listType, bool sortTriangles, bool shadowed, const PolyParam &pp) { From 12a318a7962c3d3a6b9bea15c5a0e132359947ba Mon Sep 17 00:00:00 2001 From: Isaac Marovitz Date: Wed, 18 Jun 2025 14:43:44 -0400 Subject: [PATCH 46/48] Command Buffer Restructure + RTT Fixes Signed-off-by: Isaac Marovitz --- CMakeLists.txt | 3 + core/rend/metal/metal.h | 52 +++++++++++++++ core/rend/metal/metal_commandpool.h | 53 +++++++++++++++ core/rend/metal/metal_commandpool.mm | 98 ++++++++++++++++++++++++++++ core/rend/metal/metal_drawer.h | 32 +++++++-- core/rend/metal/metal_drawer.mm | 53 ++++++++++++--- core/rend/metal/metal_renderer.h | 6 +- core/rend/metal/metal_renderer.mm | 57 ++++++++++------ core/rend/metal/metal_texture.h | 79 +++++++++++++++++++++- core/rend/metal/metal_texture.mm | 92 ++++++++++++++++++++++++-- 10 files changed, 481 insertions(+), 44 deletions(-) create mode 100644 core/rend/metal/metal.h create mode 100644 core/rend/metal/metal_commandpool.h create mode 100644 core/rend/metal/metal_commandpool.mm diff --git a/CMakeLists.txt b/CMakeLists.txt index 615a2e62ff..658f345361 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1548,6 +1548,9 @@ if(APPLE AND USE_METAL) core/rend/metal/metal_drawer.mm core/rend/metal/metal_quad.h core/rend/metal/metal_quad.mm + core/rend/metal/metal_commandpool.h + core/rend/metal/metal_commandpool.mm + core/rend/metal/metal.h core/rend/metal/metal_driver.h core/deps/imgui/backends/imgui_impl_metal.h core/deps/imgui/backends/imgui_impl_metal.mm) diff --git a/core/rend/metal/metal.h b/core/rend/metal/metal.h new file mode 100644 index 0000000000..0223cdd55b --- /dev/null +++ b/core/rend/metal/metal.h @@ -0,0 +1,52 @@ +/* + Copyright 2025 flyinghead + + This file is part of Flycast. + + Flycast is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + Flycast is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Flycast. If not, see . +*/ + +#pragma once + +#include "types.h" + +class MetalDeletable +{ +public: + virtual ~MetalDeletable() = default; +}; + +class MetalFlightManager +{ +public: + virtual void addToFlight(MetalDeletable *object) = 0; + + virtual ~MetalFlightManager() = default; +}; + +template +class MetalDeleter : public MetalDeletable +{ +public: + MetalDeleter() = delete; + explicit MetalDeleter(T& o) : o(o) {} + MetalDeleter(T&& o) : o(std::move(o)) {} + ~MetalDeleter() override { + if constexpr (std::is_pointer_v) + delete o; + } + +private: + T o; +}; \ No newline at end of file diff --git a/core/rend/metal/metal_commandpool.h b/core/rend/metal/metal_commandpool.h new file mode 100644 index 0000000000..94513a4199 --- /dev/null +++ b/core/rend/metal/metal_commandpool.h @@ -0,0 +1,53 @@ +/* + Copyright 2025 flyinghead + + This file is part of Flycast. + + Flycast is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + Flycast is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Flycast. If not, see . +*/ + +#pragma once +#include "metal.h" + +#include +#include + +class MetalCommandPool : public MetalFlightManager +{ +public: + void Init(size_t chainSize = 3); + void Term(); + void BeginFrame(); + void EndFrame(); + void EndFrameAndWait(); + id Allocate(); + + int GetIndex() const { + return index; + } + + void addToFlight(MetalDeletable *object) override { + inFlightObjects[index].emplace_back(object); + } + +private: + int index = 0; + std::vector> inFlightBuffers; + std::vector> events; + size_t chainSize; + std::vector>> inFlightObjects; + bool frameStarted = false; + id queue = nil; + id device = nil; +}; diff --git a/core/rend/metal/metal_commandpool.mm b/core/rend/metal/metal_commandpool.mm new file mode 100644 index 0000000000..72008b651c --- /dev/null +++ b/core/rend/metal/metal_commandpool.mm @@ -0,0 +1,98 @@ +/* + Copyright 2025 flyinghead + + This file is part of Flycast. + + Flycast is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + Flycast is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Flycast. If not, see . +*/ + +#include "metal_commandpool.h" +#import "metal_context.h" + +void MetalCommandPool::Init(size_t chainSize) +{ + this->chainSize = chainSize; + device = MetalContext::Instance()->GetDevice(); + + queue = [device newCommandQueue]; + + ERROR_LOG(RENDERER, "MetalCommandPool::Init chainSize=%d", chainSize); + if (events.size() > chainSize) + { + events.resize(chainSize); + } + else + { + while (events.size() < chainSize) + { + events.push_back(nil); + } + } + + inFlightBuffers.resize(chainSize); + inFlightObjects.resize(chainSize); +} + +void MetalCommandPool::Term() +{ + for (id event : events) + { + if (event != nil) + [event waitUntilSignaledValue:1 timeoutMS:UINT64_MAX]; + } + inFlightObjects.clear(); + inFlightBuffers.clear(); + events.clear(); +} + +void MetalCommandPool::BeginFrame() +{ + if (frameStarted) + return; + frameStarted = true; + index = (index + 1) % chainSize; + if (events[index] != nil) + [events[index] waitUntilSignaledValue:1 timeoutMS:UINT64_MAX]; + inFlightBuffers[index] = nil; + inFlightObjects[index].clear(); +} + +void MetalCommandPool::EndFrame() +{ + if (!frameStarted) + return; + frameStarted = false; + events[index] = [device newSharedEvent]; + [inFlightBuffers[index] encodeSignalEvent:events[index] value:1]; + [inFlightBuffers[index] commit]; +} + +id MetalCommandPool::Allocate() +{ + verify(frameStarted); + if (inFlightBuffers[index] == nil) + inFlightBuffers[index] = [queue commandBuffer]; + return inFlightBuffers[index]; +} + +void MetalCommandPool::EndFrameAndWait() +{ + EndFrame(); + for (id event : events) + { + if (event != nil) + [event waitUntilSignaledValue:1 timeoutMS:UINT64_MAX]; + } + inFlightObjects[index].clear(); +} \ No newline at end of file diff --git a/core/rend/metal/metal_drawer.h b/core/rend/metal/metal_drawer.h index e8c906e00e..7de2982880 100644 --- a/core/rend/metal/metal_drawer.h +++ b/core/rend/metal/metal_drawer.h @@ -23,6 +23,7 @@ #include "metal_shaders.h" #include "metal_pipeline.h" #include "metal_buffer.h" +#include "metal_commandpool.h" #include "rend/tileclip.h" #include "rend/transform_matrix.h" #include "rend/sorter.h" @@ -31,6 +32,9 @@ class MetalBaseDrawer { +public: + void SetCommandPool(MetalCommandPool *commandPool) { this->commandPool = commandPool; } + protected: TileClipping SetTileClip(id encoder, u32 val, MTLScissorRect& clipRect); void SetBaseScissor(MTLViewport viewport); @@ -55,12 +59,12 @@ class MetalBaseDrawer buffer = mainBuffers.back().release(); mainBuffers.pop_back(); if (buffer->bufferSize < size) { + commandPool->addToFlight(new MetalDeleter(buffer)); u32 newSize = (u32)buffer->bufferSize; while (newSize < size) newSize *= 2; - INFO_LOG(RENDERER, "Increasing main buffer size %zd -> %d", buffer->bufferSize, newSize); - [buffer->buffer setPurgeableState: MTLPurgeableStateEmpty]; + delete buffer; buffer = new MetalBufferData(newSize); } @@ -70,6 +74,21 @@ class MetalBaseDrawer buffer = new MetalBufferData(std::max(512 * 1024u, size)); } + class BufferHolder : public MetalDeletable + { + public: + BufferHolder(MetalBufferData *buffer, MetalBaseDrawer *drawer) : buffer(buffer), drawer(drawer) {} + + ~BufferHolder() override { + drawer->mainBuffers.emplace_back(buffer); + } + + private: + MetalBufferData *buffer; + MetalBaseDrawer *drawer; + }; + commandPool->addToFlight(new BufferHolder(buffer, this)); + return buffer; } @@ -183,6 +202,7 @@ class MetalBaseDrawer MTLScissorRect baseScissor {}; MTLScissorRect currentScissor {}; TransformMatrix matrices; + MetalCommandPool *commandPool = nullptr; std::vector> mainBuffers; }; @@ -191,7 +211,7 @@ class MetalDrawer : public MetalBaseDrawer public: virtual ~MetalDrawer() = default; - bool Draw(const MetalTexture *fogTexture, const MetalTexture *paletteTexture, id commandBuffer); + bool Draw(const MetalTexture *fogTexture, const MetalTexture *paletteTexture); virtual void EndRenderPass() { renderPassStarted = false; } @@ -201,7 +221,7 @@ class MetalDrawer : public MetalBaseDrawer } protected: - virtual id BeginRenderPass(id commandBuffer) = 0; + virtual id BeginRenderPass() = 0; void Init(MetalSamplers *samplers, MetalPipelineManager pipelineManager) { this->samplers = samplers; this->pipelineManager = std::make_unique(pipelineManager); @@ -256,7 +276,7 @@ class MetalScreenDrawer : public MetalDrawer } protected: - id BeginRenderPass(id commandBuffer) override; + id BeginRenderPass() override; private: std::vector> framebuffers; @@ -279,7 +299,7 @@ class MetalTextureDrawer : public MetalDrawer void EndRenderPass() override; protected: - id BeginRenderPass(id commandBuffer) override; + id BeginRenderPass() override; private: u32 width = 0; diff --git a/core/rend/metal/metal_drawer.mm b/core/rend/metal/metal_drawer.mm index b99d61a3d8..4faa6fef83 100644 --- a/core/rend/metal/metal_drawer.mm +++ b/core/rend/metal/metal_drawer.mm @@ -144,7 +144,7 @@ } if (poly.texture != nullptr) { - auto texture = ((MetalTexture *)poly.texture)->GetTexture(); + auto texture = ((MetalTexture *)poly.texture)->GetReadOnlyTexture(); [encoder setFragmentTexture:texture atIndex:0]; // Texture sampler @@ -344,12 +344,12 @@ offsets.lightsOffset = packNaomi2Lights(packer); } - MetalBufferData *buffer = GetMainBuffer(packer.size()); + MetalBufferData *buffer = new MetalBufferData(packer.size()); packer.upload(*buffer); curMainBuffer = buffer->buffer; } -bool MetalDrawer::Draw(const MetalTexture *fogTexture, const MetalTexture *paletteTexture, id commandBuffer) { +bool MetalDrawer::Draw(const MetalTexture *fogTexture, const MetalTexture *paletteTexture) { MetalFragmentShaderUniforms fragUniforms = MakeFragmentUniforms(); dithering = config::EmulateFramebuffer && pvrrc.fb_W_CTRL.fb_dither && pvrrc.fb_W_CTRL.fb_packmode <= 3; if (dithering) { @@ -375,7 +375,7 @@ currentScissor = MTLScissorRect {}; @autoreleasepool { - id renderEncoder = BeginRenderPass(commandBuffer); + id renderEncoder = BeginRenderPass(); [renderEncoder retain]; [renderEncoder setFragmentTexture:fogTexture->GetTexture() atIndex:2]; @@ -447,7 +447,7 @@ rttPassDescriptor = [[MTLRenderPassDescriptor alloc] init]; } -id MetalTextureDrawer::BeginRenderPass(id commandBuffer) { +id MetalTextureDrawer::BeginRenderPass() { DEBUG_LOG(RENDERER, "RenderToTexture packmode=%d stride=%d - %d x %d @ %06x", pvrrc.fb_W_CTRL.fb_packmode, pvrrc.fb_W_LINESTRIDE * 8, pvrrc.fb_X_CLIP.max + 1, pvrrc.fb_Y_CLIP.max + 1, pvrrc.fb_W_SOF1 & VRAM_MASK); matrices.CalcMatrices(&pvrrc); @@ -461,6 +461,8 @@ u32 heightPow2; getRenderToTextureDimensions(upscaledWidth, upscaledHeight, widthPow2, heightPow2); + id commandBuffer = commandPool->Allocate(); + if (!depthAttachment || widthPow2 > depthAttachment.width || heightPow2 > depthAttachment.height) { MTLTextureDescriptor *depthDescriptor = [[MTLTextureDescriptor alloc] init]; @@ -479,6 +481,12 @@ if (!config::RenderToTextureBuffer) { texture = textureCache->getRTTexture(textureAddr, pvrrc.fb_W_CTRL.fb_packmode, origWidth, origHeight); + if (textureCache->IsInFlight(texture, false)) + { + texture->CreateReadOnlyCopy(commandBuffer); + texture->deferDeleteResource(commandPool); + } + textureCache->SetInFlight(texture); // Check if we need to recreate the texture bool needsRecreation = !texture->GetTexture() || @@ -598,8 +606,32 @@ this->viewport.originY != viewport.originY || this->viewport.zfar != viewport.zfar || this->viewport.znear != viewport.znear) { + if (!framebuffers.empty()) { + verify(commandPool != nullptr); + commandPool->addToFlight(new MetalDeleter(std::move(framebuffers))); + } + if (depthAttachment) { + class ResourceDeleter : public MetalDeletable + { + public: + ResourceDeleter(id texture) + { + std::swap(this->texture, texture); + } + + ~ResourceDeleter() override { + [texture setPurgeableState:MTLPurgeableStateEmpty]; + texture = nil; + } + + private: + id texture = nil; + }; + + commandPool->addToFlight(new ResourceDeleter(depthAttachment)); + } + depthAttachment = nil; - framebuffers.clear(); clearPassDescriptors.clear(); loadPassDescriptors.clear(); clearNeeded.clear(); @@ -667,11 +699,11 @@ MetalDrawer::Init(samplers, MetalPipelineManager(shaderManager)); } -id MetalScreenDrawer::BeginRenderPass(id commandBuffer) { +id MetalScreenDrawer::BeginRenderPass() { if (!renderPassStarted) { frameRendered = false; - + id commandBuffer = commandPool->Allocate(); MTLRenderPassDescriptor* passDescriptor = clearNeeded[GetCurrentImage()] || pvrrc.clearFramebuffer ? clearPassDescriptors[GetCurrentImage()] : loadPassDescriptors[GetCurrentImage()]; clearNeeded[GetCurrentImage()] = false; currentEncoder = [commandBuffer renderCommandEncoderWithDescriptor:passDescriptor]; @@ -697,13 +729,14 @@ if (emulateFramebuffer) { - + // TODO: scaleAndWriteFramebuffer } else { + aspectRatio = getOutputFramebufferAspectRatio(); } + commandPool->EndFrame(); MetalDrawer::EndRenderPass(); - frameRendered = true; } \ No newline at end of file diff --git a/core/rend/metal/metal_renderer.h b/core/rend/metal/metal_renderer.h index 8f2bfa9ac9..3673a3aa27 100644 --- a/core/rend/metal/metal_renderer.h +++ b/core/rend/metal/metal_renderer.h @@ -23,6 +23,7 @@ #include "metal_texture.h" #include "metal_buffer.h" #include "metal_drawer.h" +#include "metal_commandpool.h" #include "hw/pvr/Renderer_if.h" #include "rend/tileclip.h" @@ -31,7 +32,7 @@ class BaseMetalRenderer : public Renderer { protected: - bool BaseInit(id commandEncoder); + bool BaseInit(); public: void Term() override; @@ -63,7 +64,8 @@ class BaseMetalRenderer : public Renderer MetalShaders shaderManager; std::unique_ptr fogTexture; std::unique_ptr paletteTexture; - id commandBuffer = nil; + MetalCommandPool texCommandPool; + MetalCommandPool fbCommandPool; id texCommandBuffer = nil; std::vector> framebufferTextures; int framebufferTexIndex = 0; diff --git a/core/rend/metal/metal_renderer.mm b/core/rend/metal/metal_renderer.mm index 3be4571b4a..c30ccf89c7 100644 --- a/core/rend/metal/metal_renderer.mm +++ b/core/rend/metal/metal_renderer.mm @@ -24,8 +24,11 @@ #include "hw/aica/dsp.h" #include "hw/pvr/ta.h" -bool BaseMetalRenderer::BaseInit(id commandEncoder) +bool BaseMetalRenderer::BaseInit() { + texCommandPool.Init(); + fbCommandPool.Init(); + return true; } @@ -33,6 +36,8 @@ { WaitIdle(); MetalContext::Instance()->PresentFrame(nil, MTLViewport {}, 0); + texCommandPool.Term(); + fbCommandPool.Term(); textureCache.Clear(); fogTexture = nil; paletteTexture = nil; @@ -48,8 +53,9 @@ if (tf->NeedsUpdate()) { tf->SetCommandBuffer(texCommandBuffer); - if (!tf->Update()) { - tf = nullptr; + if (!tf->Update()) + { + tf->SetCommandBuffer(nil); return nullptr; } } @@ -58,6 +64,7 @@ tf->SetCommandBuffer(texCommandBuffer); } tf->SetCommandBuffer(nil); + textureCache.SetInFlight(tf); return tf; } @@ -74,20 +81,24 @@ resetTextureCache = false; } - texCommandBuffer = [MetalContext::Instance()->GetQueue() commandBuffer]; + texCommandPool.BeginFrame(); + textureCache.SetCurrentIndex(texCommandPool.GetIndex()); + textureCache.Cleanup(); + + texCommandBuffer = texCommandPool.Allocate(); ta_parse(ctx, true); // TODO can't update fog or palette twice in multi render CheckFogTexture(); CheckPaletteTexture(); - [texCommandBuffer commit]; texCommandBuffer = nil; } void BaseMetalRenderer::ReInitOSD() { - + texCommandPool.Init(); + fbCommandPool.Init(); } void BaseMetalRenderer::RenderFramebuffer(const FramebufferInfo &info) @@ -103,6 +114,11 @@ curTexture = std::make_unique(); curTexture->tex_type = TextureType::_8888; } + + fbCommandPool.BeginFrame(); + id fbCommandBuffer = fbCommandPool.Allocate(); + + curTexture->SetCommandBuffer(fbCommandBuffer); if (info.fb_r_ctrl.fb_enable == 0 || info.vo_control.blank_video == 1) { // Video output disabled @@ -118,15 +134,17 @@ curTexture->UploadToGPU(width, height, (u8*)pb.data(), false); } - + curTexture->SetCommandBuffer(nil); + fbCommandBuffer = nil; + fbCommandPool.EndFrame(); framebufferRendered = true; clearLastFrame = false; } void BaseMetalRenderer::WaitIdle() { - [commandBuffer waitUntilCompleted]; - commandBuffer = nil; +// [commandBuffer waitUntilCompleted]; +// commandBuffer = nil; } void BaseMetalRenderer::CheckFogTexture() { @@ -180,13 +198,16 @@ class MetalRenderer final : public BaseMetalRenderer { public: - bool Init() + bool Init() override { NOTICE_LOG(RENDERER, "MetalRenderer::Init"); textureDrawer.Init(&samplerManager, &shaderManager, &textureCache); + textureDrawer.SetCommandPool(&texCommandPool); + screenDrawer.Init(&samplerManager, &shaderManager, viewport); - // BaseInit(screenDrawer.GetRenderPass()); + screenDrawer.SetCommandPool(&texCommandPool); + BaseInit(); emulateFramebuffer = config::EmulateFramebuffer; return true; @@ -196,6 +217,7 @@ void Term() { NOTICE_LOG(RENDERER, "MetalRenderer::Term"); WaitIdle(); + texCommandPool.Term(); screenDrawer.Term(); textureDrawer.Term(); samplerManager.term(); @@ -221,8 +243,6 @@ void Process(TA_context* ctx) override bool Render() override { - id commandBuffer = [MetalContext::Instance()->GetQueue() commandBuffer]; - MetalDrawer *drawer; if (pvrrc.isRTT) drawer = &textureDrawer; @@ -231,13 +251,10 @@ bool Render() override drawer = &screenDrawer; } - drawer->Draw(fogTexture.get(), paletteTexture.get(), commandBuffer); - // TODO: ENABLE LATER WHEN WE CAN - //if (config::EmulateFramebuffer || pvrrc.isRTT) + drawer->Draw(fogTexture.get(), paletteTexture.get()); + if (config::EmulateFramebuffer || pvrrc.isRTT) // delay ending the render pass in case of multi render - drawer->EndRenderPass(); - - [commandBuffer commit]; + drawer->EndRenderPass(); return !pvrrc.isRTT; } @@ -258,7 +275,7 @@ void resize(int w, int h) override if ((u32)w == viewport.width && (u32)h == viewport.height) return; BaseMetalRenderer::resize(w, h); - WaitIdle(); + // WaitIdle(); screenDrawer.Init(&samplerManager, &shaderManager, viewport); } diff --git a/core/rend/metal/metal_texture.h b/core/rend/metal/metal_texture.h index 7b1b0a530a..650d61c416 100644 --- a/core/rend/metal/metal_texture.h +++ b/core/rend/metal/metal_texture.h @@ -20,7 +20,9 @@ #pragma once #include "rend/TexCache.h" #include "metal_context.h" -#include +#include "metal.h" + +#include #include class MetalTexture final : public BaseTextureCacheData @@ -37,7 +39,12 @@ class MetalTexture final : public BaseTextureCacheData this->width = width; this->height = height; } - bool Delete() override; + void SetInFlight(bool inFlight) { + this->isInFlight = inFlight; + } + void deferDeleteResource(MetalFlightManager *manager); + id GetReadOnlyTexture() const { return readOnlyTexture ? readOnlyTexture : texture; } + void CreateReadOnlyCopy(id commandBuffer); private: void Init(u32 width, u32 height, MTLPixelFormat format, u32 dataSize, bool mipmapped, bool mipmapsIncluded); @@ -50,6 +57,10 @@ class MetalTexture final : public BaseTextureCacheData u32 mipmapLevels = 1; id commandBuffer = nil; id texture = nil; + id readOnlyTexture = nil; + bool isInFlight = false; + + friend class MetalTextureCache; }; class MetalSamplers @@ -122,5 +133,69 @@ class MetalSamplers class MetalTextureCache final : public BaseTextureCache { +public: + MetalTextureCache() {} + + void SetCurrentIndex(int index) + { + if (index == (int)currentIndex) + return; + if (currentIndex < inFlightTextures.size()) + std::for_each(inFlightTextures[currentIndex].begin(), inFlightTextures[currentIndex].end(), + [](MetalTexture *texture) { + texture->SetInFlight(false); + texture->readOnlyTexture = nil; + }); + currentIndex = index; + EmptyTrash(inFlightTextures); + } + + bool IsInFlight(MetalTexture *texture, bool previous) + { + for (u32 i = 0; i < inFlightTextures.size(); i++) + if ((!previous || i != currentIndex) + && inFlightTextures[i].find(texture) != inFlightTextures[i].end()) + return true; + return false; + } + + void SetInFlight(MetalTexture *texture) + { + texture->SetInFlight(true); + inFlightTextures[currentIndex].insert(texture); + } + + void Cleanup(); + + void Clear() + { + for (auto& set : inFlightTextures) + { + for (MetalTexture *tex : set) + tex->SetInFlight(false); + set.clear(); + } + BaseTextureCache::Clear(); + } + +private: + bool clearTexture(MetalTexture *tex) + { + for (auto& set : inFlightTextures) + set.erase(tex); + + return tex->Delete(); + } + + template + void EmptyTrash(T& v) + { + if (v.size() < currentIndex + 1) + v.resize(currentIndex + 1); + else + v[currentIndex].clear(); + } + std::vector> inFlightTextures; + u32 currentIndex = ~0; }; \ No newline at end of file diff --git a/core/rend/metal/metal_texture.mm b/core/rend/metal/metal_texture.mm index 76c81dd801..091ba7fe7f 100644 --- a/core/rend/metal/metal_texture.mm +++ b/core/rend/metal/metal_texture.mm @@ -174,15 +174,99 @@ [blitEncoder endEncoding]; } -bool MetalTexture::Delete() +void MetalTexture::deferDeleteResource(MetalFlightManager *manager) { - [texture setPurgeableState:MTLPurgeableStateEmpty]; - texture = nil; + class ResourceDeleter : public MetalDeletable + { + public: + ResourceDeleter(MetalTexture *texture) + { + std::swap(this->texture, texture->texture); + std::swap(this->readOnlyTexture, texture->readOnlyTexture); + } + + ~ResourceDeleter() override { + [texture setPurgeableState:MTLPurgeableStateEmpty]; + texture = nil; + [readOnlyTexture setPurgeableState:MTLPurgeableStateEmpty]; + readOnlyTexture = nil; + } - return true; + private: + id texture = nil; + id readOnlyTexture; + }; + manager->addToFlight(new ResourceDeleter(this)); +} + +void MetalTexture::CreateReadOnlyCopy(id commandBuffer) +{ + if (!texture || readOnlyTexture) + return; + + MTLTextureDescriptor *desc = [[MTLTextureDescriptor alloc] init]; + [desc setWidth:texture.width]; + [desc setHeight:texture.height]; + [desc setPixelFormat:texture.pixelFormat]; + [desc setMipmapLevelCount:texture.mipmapLevelCount]; + [desc setStorageMode:MTLStorageModePrivate]; + [desc setUsage:MTLTextureUsageShaderRead]; + + readOnlyTexture = [texture.device newTextureWithDescriptor:desc]; + [readOnlyTexture setLabel:@"RTT Read-Only Copy"]; + + id blitEncoder = [commandBuffer blitCommandEncoder]; + [blitEncoder copyFromTexture:texture + sourceSlice:0 + sourceLevel:0 + sourceOrigin:MTLOriginMake(0, 0, 0) + sourceSize:MTLSizeMake(texture.width, texture.height, 1) + toTexture:readOnlyTexture + destinationSlice:0 + destinationLevel:0 + destinationOrigin:MTLOriginMake(0, 0, 0)]; + + for (NSUInteger level = 1; level < texture.mipmapLevelCount; level++) { + NSUInteger mipWidth = MAX(texture.width >> level, 1); + NSUInteger mipHeight = MAX(texture.height >> level, 1); + + [blitEncoder copyFromTexture:texture + sourceSlice:0 + sourceLevel:level + sourceOrigin:MTLOriginMake(0, 0, 0) + sourceSize:MTLSizeMake(mipWidth, mipHeight, 1) + toTexture:readOnlyTexture + destinationSlice:0 + destinationLevel:level + destinationOrigin:MTLOriginMake(0, 0, 0)]; + } + + [blitEncoder endEncoding]; } MetalSamplers::MetalSamplers() = default; MetalSamplers::~MetalSamplers() { term(); } + +void MetalTextureCache::Cleanup() +{ + std::vector list; + + u32 TargetFrame = std::max((u32)120, FrameCount) - 120; + + for (const auto& [id, texture] : cache) + { + if (texture.dirty && texture.dirty < TargetFrame) + list.push_back(id); + + if (list.size() > 5) + break; + } + + for (u64 id : list) + { + if (clearTexture(&cache[id])) + cache.erase(id); + } +} \ No newline at end of file From 6132a0679e5fb0eeed87d0a58a42299e5c089236 Mon Sep 17 00:00:00 2001 From: Isaac Marovitz Date: Wed, 18 Jun 2025 14:43:55 -0400 Subject: [PATCH 47/48] Rtt Signed-off-by: Isaac Marovitz --- core/rend/metal/metal_drawer.mm | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/core/rend/metal/metal_drawer.mm b/core/rend/metal/metal_drawer.mm index 4faa6fef83..f815bdb6bb 100644 --- a/core/rend/metal/metal_drawer.mm +++ b/core/rend/metal/metal_drawer.mm @@ -576,11 +576,8 @@ if (config::RenderToTextureBuffer) { + commandPool->EndFrameAndWait(); - } - - if (config::RenderToTextureBuffer) - { u16 *dst = (u16 *)&vram[textureAddr]; PixelBuffer tmpBuf; @@ -590,7 +587,9 @@ } else { - + commandPool->EndFrame(); + texture->dirty = 0; + texture->unprotectVRam(); } MetalDrawer::EndRenderPass(); From 17eb7a554b88c24909dc0f04f0dd572b4981b033 Mon Sep 17 00:00:00 2001 From: Isaac Marovitz Date: Sat, 21 Jun 2025 15:40:32 -0400 Subject: [PATCH 48/48] Disable anistropic filtering on linear and punchThrough Signed-off-by: Isaac Marovitz --- core/rend/metal/metal_texture.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/core/rend/metal/metal_texture.h b/core/rend/metal/metal_texture.h index 650d61c416..fc72813f38 100644 --- a/core/rend/metal/metal_texture.h +++ b/core/rend/metal/metal_texture.h @@ -117,7 +117,11 @@ class MetalSamplers [desc setTAddressMode:tRepeat]; [desc setRAddressMode:tRepeat]; [desc setCompareFunction:MTLCompareFunctionNever]; - [desc setMaxAnisotropy:config::AnisotropicFiltering]; + if (tsp.FilterMode == 1 && !punchThrough) { + [desc setMaxAnisotropy:config::AnisotropicFiltering]; + } else { + [desc setMaxAnisotropy:1]; + } sampler = [MetalContext::Instance()->GetDevice() newSamplerStateWithDescriptor:desc];