diff --git a/CMakeLists.txt b/CMakeLists.txt index bfe4d505af..658f345361 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -30,6 +30,10 @@ else() project(flycast) endif() +if(APPLE) + enable_language(OBJC OBJCXX) +endif() + set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) if(CMAKE_SYSTEM_NAME STREQUAL "NetBSD" OR NINTENDO_SWITCH) @@ -62,6 +66,7 @@ option(USE_HOST_SDL "Use host SDL library" ${USE_HOST_SDL_DEFAULT}) option(USE_HOST_LIBCHDR "Use host libchdr" OFF) option(USE_OPENMP "Use OpenMP if available" ON) option(USE_VULKAN "Build with Vulkan support" ON) +option(USE_METAL "Build with Metal support" ON) option(USE_DX9 "Build with Direct3D 9 support" ON) option(USE_DX11 "Build with Direct3D 11 support" ON) option(LIBRETRO "Build libretro core" OFF) @@ -71,7 +76,7 @@ option(USE_ALSA "Build with ALSA support" ON) option(USE_LIBAO "Build with AO support" ON) option(USE_OSS "Build with OSS support" OFF) option(USE_PULSEAUDIO "Build with PulseAudio support" ON) -option(USE_BREAKPAD "Build and link with breakpad library" ON) +option(USE_BREAKPAD "Build and link with breakpad library" OFF) option(USE_LUA "Build with Lua support" ON) option(ENABLE_GDB_SERVER "Build with GDB debugging support" OFF) option(ENABLE_DC_PROFILER "Build with support for target machine (SH4) profiler" OFF) @@ -83,6 +88,10 @@ if(IOS AND NOT LIBRETRO) set(USE_VULKAN OFF CACHE BOOL "Force vulkan off" FORCE) endif() +if(NOT APPLE) + set(USE_METAL OFF CACHE BOOL "Force metal off" FORCE) +endif() + include(GNUInstallDirs) include(CMakeRC) @@ -1337,8 +1346,15 @@ endif() target_sources(${PROJECT_NAME} PRIVATE core/wsi/context.h core/wsi/libretro.cpp - core/wsi/libretro.h + core/wsi/libretro.h) + +if(USE_METAL) + target_sources(${PROJECT_NAME} PRIVATE + core/wsi/switcher.mm) +else() + target_sources(${PROJECT_NAME} PRIVATE core/wsi/switcher.cpp) +endif() if(USE_OPENGL) target_compile_definitions(${PROJECT_NAME} PRIVATE USE_OPENGL) @@ -1511,6 +1527,35 @@ if(USE_VULKAN) endif() endif() +if(APPLE AND USE_METAL) + target_link_libraries(${PROJECT_NAME} PRIVATE "-framework Metal -framework QuartzCore -framework CoreGraphics") + + target_compile_definitions(${PROJECT_NAME} PRIVATE USE_METAL HAVE_METAL) + target_sources(${PROJECT_NAME} PRIVATE + core/rend/metal/metal_renderer.h + core/rend/metal/metal_renderer.mm + core/rend/metal/metal_context.h + core/rend/metal/metal_context.mm + core/rend/metal/metal_shaders.h + core/rend/metal/metal_shaders.mm + core/rend/metal/metal_texture.h + core/rend/metal/metal_texture.mm + core/rend/metal/metal_pipeline.h + core/rend/metal/metal_pipeline.mm + core/rend/metal/metal_buffer.h + core/rend/metal/metal_buffer.mm + core/rend/metal/metal_drawer.h + core/rend/metal/metal_drawer.mm + core/rend/metal/metal_quad.h + core/rend/metal/metal_quad.mm + core/rend/metal/metal_commandpool.h + core/rend/metal/metal_commandpool.mm + core/rend/metal/metal.h + core/rend/metal/metal_driver.h + core/deps/imgui/backends/imgui_impl_metal.h + core/deps/imgui/backends/imgui_impl_metal.mm) +endif() + if(WIN32 AND USE_DX9 AND NOT LIBRETRO AND NOT WINDOWS_STORE AND ("x86" IN_LIST ARCHITECTURE OR "x86_64" IN_LIST ARCHITECTURE)) set(REND_DX9_FILES core/rend/dx9/d3d_overlay.h diff --git a/core/cfg/option.h b/core/cfg/option.h index 9e8d8e6450..260247ba4b 100644 --- a/core/cfg/option.h +++ b/core/cfg/option.h @@ -415,6 +415,8 @@ class RendererOption : public Option { RenderType::DirectX11 #elif defined(USE_DX9) RenderType::DirectX9 +#elif defined(USE_METAL) + RenderType::Metal #elif !defined(USE_OPENGL) RenderType::Vulkan #else diff --git a/core/deps/imgui/backends/imgui_impl_metal.h b/core/deps/imgui/backends/imgui_impl_metal.h new file mode 100644 index 0000000000..d0debff0dc --- /dev/null +++ b/core/deps/imgui/backends/imgui_impl_metal.h @@ -0,0 +1,75 @@ +// dear imgui: Renderer Backend for Metal +// This needs to be used along with a Platform Backend (e.g. OSX) + +// Implemented features: +// [X] Renderer: User texture binding. Use 'MTLTexture' as ImTextureID. Read the FAQ about ImTextureID! +// [X] Renderer: Large meshes support (64k+ vertices) even with 16-bit indices (ImGuiBackendFlags_RendererHasVtxOffset). + +// You can use unmodified imgui_impl_* files in your project. See examples/ folder for examples of using this. +// Prefer including the entire imgui/ repository into your project (either as a copy or as a submodule), and only build the backends you need. +// Learn about Dear ImGui: +// - FAQ https://dearimgui.com/faq +// - Getting Started https://dearimgui.com/getting-started +// - Documentation https://dearimgui.com/docs (same as your local docs/ folder). +// - Introduction, links and more at the top of imgui.cpp + +#pragma once +#include "imgui.h" // IMGUI_IMPL_API +#ifndef IMGUI_DISABLE + +//----------------------------------------------------------------------------- +// ObjC API +//----------------------------------------------------------------------------- + +#ifdef __OBJC__ + +@class MTLRenderPassDescriptor; +@protocol MTLDevice, MTLCommandBuffer, MTLRenderCommandEncoder; + +// Follow "Getting Started" link and check examples/ folder to learn about using backends! +IMGUI_IMPL_API bool ImGui_ImplMetal_Init(id device); +IMGUI_IMPL_API void ImGui_ImplMetal_Shutdown(); +IMGUI_IMPL_API void ImGui_ImplMetal_NewFrame(MTLRenderPassDescriptor* renderPassDescriptor); +IMGUI_IMPL_API void ImGui_ImplMetal_RenderDrawData(ImDrawData* drawData, + id commandBuffer, + id commandEncoder); + +// Called by Init/NewFrame/Shutdown +IMGUI_IMPL_API bool ImGui_ImplMetal_CreateFontsTexture(id device); +IMGUI_IMPL_API void ImGui_ImplMetal_DestroyFontsTexture(); +IMGUI_IMPL_API bool ImGui_ImplMetal_CreateDeviceObjects(id device); +IMGUI_IMPL_API void ImGui_ImplMetal_DestroyDeviceObjects(); + +#endif + +//----------------------------------------------------------------------------- +// C++ API +//----------------------------------------------------------------------------- + +// Enable Metal C++ binding support with '#define IMGUI_IMPL_METAL_CPP' in your imconfig.h file +// More info about using Metal from C++: https://developer.apple.com/metal/cpp/ + +#ifdef IMGUI_IMPL_METAL_CPP +#include +#ifndef __OBJC__ + +// Follow "Getting Started" link and check examples/ folder to learn about using backends! +IMGUI_IMPL_API bool ImGui_ImplMetal_Init(MTL::Device* device); +IMGUI_IMPL_API void ImGui_ImplMetal_Shutdown(); +IMGUI_IMPL_API void ImGui_ImplMetal_NewFrame(MTL::RenderPassDescriptor* renderPassDescriptor); +IMGUI_IMPL_API void ImGui_ImplMetal_RenderDrawData(ImDrawData* draw_data, + MTL::CommandBuffer* commandBuffer, + MTL::RenderCommandEncoder* commandEncoder); + +// Called by Init/NewFrame/Shutdown +IMGUI_IMPL_API bool ImGui_ImplMetal_CreateFontsTexture(MTL::Device* device); +IMGUI_IMPL_API void ImGui_ImplMetal_DestroyFontsTexture(); +IMGUI_IMPL_API bool ImGui_ImplMetal_CreateDeviceObjects(MTL::Device* device); +IMGUI_IMPL_API void ImGui_ImplMetal_DestroyDeviceObjects(); + +#endif +#endif + +//----------------------------------------------------------------------------- + +#endif // #ifndef IMGUI_DISABLE \ No newline at end of file diff --git a/core/deps/imgui/backends/imgui_impl_metal.mm b/core/deps/imgui/backends/imgui_impl_metal.mm new file mode 100644 index 0000000000..810c696e9a --- /dev/null +++ b/core/deps/imgui/backends/imgui_impl_metal.mm @@ -0,0 +1,596 @@ +// dear imgui: Renderer Backend for Metal +// This needs to be used along with a Platform Backend (e.g. OSX) + +// Implemented features: +// [X] Renderer: User texture binding. Use 'MTLTexture' as ImTextureID. Read the FAQ about ImTextureID! +// [X] Renderer: Large meshes support (64k+ vertices) even with 16-bit indices (ImGuiBackendFlags_RendererHasVtxOffset). + +// You can use unmodified imgui_impl_* files in your project. See examples/ folder for examples of using this. +// Prefer including the entire imgui/ repository into your project (either as a copy or as a submodule), and only build the backends you need. +// Learn about Dear ImGui: +// - FAQ https://dearimgui.com/faq +// - Getting Started https://dearimgui.com/getting-started +// - Documentation https://dearimgui.com/docs (same as your local docs/ folder). +// - Introduction, links and more at the top of imgui.cpp + +// CHANGELOG +// (minor and older changes stripped away, please see git history for details) +// 2025-02-03: Metal: Crash fix. (#8367) +// 2024-01-08: Metal: Fixed memory leaks when using metal-cpp (#8276, #8166) or when using multiple contexts (#7419). +// 2022-08-23: Metal: Update deprecated property 'sampleCount'->'rasterSampleCount'. +// 2022-07-05: Metal: Add dispatch synchronization. +// 2022-06-30: Metal: Use __bridge for ARC based systems. +// 2022-06-01: Metal: Fixed null dereference on exit inside command buffer completion handler. +// 2022-04-27: Misc: Store backend data in a per-context struct, allowing to use this backend with multiple contexts. +// 2022-01-03: Metal: Ignore ImDrawCmd where ElemCount == 0 (very rare but can technically be manufactured by user code). +// 2021-12-30: Metal: Added Metal C++ support. Enable with '#define IMGUI_IMPL_METAL_CPP' in your imconfig.h file. +// 2021-08-24: Metal: Fixed a crash when clipping rect larger than framebuffer is submitted. (#4464) +// 2021-05-19: Metal: Replaced direct access to ImDrawCmd::TextureId with a call to ImDrawCmd::GetTexID(). (will become a requirement) +// 2021-02-18: Metal: Change blending equation to preserve alpha in output buffer. +// 2021-01-25: Metal: Fixed texture storage mode when building on Mac Catalyst. +// 2019-05-29: Metal: Added support for large mesh (64K+ vertices), enable ImGuiBackendFlags_RendererHasVtxOffset flag. +// 2019-04-30: Metal: Added support for special ImDrawCallback_ResetRenderState callback to reset render state. +// 2019-02-11: Metal: Projecting clipping rectangles correctly using draw_data->FramebufferScale to allow multi-viewports for retina display. +// 2018-11-30: Misc: Setting up io.BackendRendererName so it can be displayed in the About Window. +// 2018-07-05: Metal: Added new Metal backend implementation. + +#include "imgui.h" +#ifndef IMGUI_DISABLE +#include "imgui_impl_metal.h" +#import +#import + +#pragma mark - Support classes + +// A wrapper around a MTLBuffer object that knows the last time it was reused +@interface MetalBuffer : NSObject +@property (nonatomic, strong) id buffer; +@property (nonatomic, assign) double lastReuseTime; +- (instancetype)initWithBuffer:(id)buffer; +@end + +// An object that encapsulates the data necessary to uniquely identify a +// render pipeline state. These are used as cache keys. +@interface FramebufferDescriptor : NSObject +@property (nonatomic, assign) unsigned long sampleCount; +@property (nonatomic, assign) MTLPixelFormat colorPixelFormat; +@property (nonatomic, assign) MTLPixelFormat depthPixelFormat; +@property (nonatomic, assign) MTLPixelFormat stencilPixelFormat; +- (instancetype)initWithRenderPassDescriptor:(MTLRenderPassDescriptor*)renderPassDescriptor; +@end + +// A singleton that stores long-lived objects that are needed by the Metal +// renderer backend. Stores the render pipeline state cache and the default +// font texture, and manages the reusable buffer cache. +@interface MetalContext : NSObject +@property (nonatomic, strong) id device; +@property (nonatomic, strong) id depthStencilState; +@property (nonatomic, strong) FramebufferDescriptor* framebufferDescriptor; // framebuffer descriptor for current frame; transient +@property (nonatomic, strong) NSMutableDictionary* renderPipelineStateCache; // pipeline cache; keyed on framebuffer descriptors +@property (nonatomic, strong, nullable) id fontTexture; +@property (nonatomic, strong) NSMutableArray* bufferCache; +@property (nonatomic, assign) double lastBufferCachePurge; +- (MetalBuffer*)dequeueReusableBufferOfLength:(NSUInteger)length device:(id)device; +- (id)renderPipelineStateForFramebufferDescriptor:(FramebufferDescriptor*)descriptor device:(id)device; +@end + +struct ImGui_ImplMetal_Data +{ + MetalContext* SharedMetalContext; + + ImGui_ImplMetal_Data() { memset((void*)this, 0, sizeof(*this)); } +}; + +static ImGui_ImplMetal_Data* ImGui_ImplMetal_GetBackendData() { return ImGui::GetCurrentContext() ? (ImGui_ImplMetal_Data*)ImGui::GetIO().BackendRendererUserData : nullptr; } +static void ImGui_ImplMetal_DestroyBackendData(){ IM_DELETE(ImGui_ImplMetal_GetBackendData()); } + +static inline CFTimeInterval GetMachAbsoluteTimeInSeconds() { return (CFTimeInterval)(double)(clock_gettime_nsec_np(CLOCK_UPTIME_RAW) / 1e9); } + +#ifdef IMGUI_IMPL_METAL_CPP + +#pragma mark - Dear ImGui Metal C++ Backend API + +bool ImGui_ImplMetal_Init(MTL::Device* device) +{ + return ImGui_ImplMetal_Init((__bridge id)(device)); +} + +void ImGui_ImplMetal_NewFrame(MTL::RenderPassDescriptor* renderPassDescriptor) +{ + ImGui_ImplMetal_NewFrame((__bridge MTLRenderPassDescriptor*)(renderPassDescriptor)); +} + +void ImGui_ImplMetal_RenderDrawData(ImDrawData* draw_data, + MTL::CommandBuffer* commandBuffer, + MTL::RenderCommandEncoder* commandEncoder) +{ + ImGui_ImplMetal_RenderDrawData(draw_data, + (__bridge id)(commandBuffer), + (__bridge id)(commandEncoder)); + +} + +bool ImGui_ImplMetal_CreateFontsTexture(MTL::Device* device) +{ + return ImGui_ImplMetal_CreateFontsTexture((__bridge id)(device)); +} + +bool ImGui_ImplMetal_CreateDeviceObjects(MTL::Device* device) +{ + return ImGui_ImplMetal_CreateDeviceObjects((__bridge id)(device)); +} + +#endif // #ifdef IMGUI_IMPL_METAL_CPP + +#pragma mark - Dear ImGui Metal Backend API + +bool ImGui_ImplMetal_Init(id device) +{ + ImGuiIO& io = ImGui::GetIO(); + IMGUI_CHECKVERSION(); + IM_ASSERT(io.BackendRendererUserData == nullptr && "Already initialized a renderer backend!"); + + ImGui_ImplMetal_Data* bd = IM_NEW(ImGui_ImplMetal_Data)(); + io.BackendRendererUserData = (void*)bd; + io.BackendRendererName = "imgui_impl_metal"; + io.BackendFlags |= ImGuiBackendFlags_RendererHasVtxOffset; // We can honor the ImDrawCmd::VtxOffset field, allowing for large meshes. + + bd->SharedMetalContext = [[MetalContext alloc] init]; + bd->SharedMetalContext.device = device; + + return true; +} + +void ImGui_ImplMetal_Shutdown() +{ + ImGui_ImplMetal_Data* bd = ImGui_ImplMetal_GetBackendData(); + IM_UNUSED(bd); + IM_ASSERT(bd != nullptr && "No renderer backend to shutdown, or already shutdown?"); + ImGui_ImplMetal_DestroyDeviceObjects(); + ImGui_ImplMetal_DestroyBackendData(); + + ImGuiIO& io = ImGui::GetIO(); + io.BackendRendererName = nullptr; + io.BackendRendererUserData = nullptr; + io.BackendFlags &= ~ImGuiBackendFlags_RendererHasVtxOffset; +} + +void ImGui_ImplMetal_NewFrame(MTLRenderPassDescriptor* renderPassDescriptor) +{ + ImGui_ImplMetal_Data* bd = ImGui_ImplMetal_GetBackendData(); + IM_ASSERT(bd != nil && "Context or backend not initialized! Did you call ImGui_ImplMetal_Init()?"); +#ifdef IMGUI_IMPL_METAL_CPP + bd->SharedMetalContext.framebufferDescriptor = [[[FramebufferDescriptor alloc] initWithRenderPassDescriptor:renderPassDescriptor]autorelease]; +#else + bd->SharedMetalContext.framebufferDescriptor = [[FramebufferDescriptor alloc] initWithRenderPassDescriptor:renderPassDescriptor]; +#endif + if (bd->SharedMetalContext.depthStencilState == nil) + ImGui_ImplMetal_CreateDeviceObjects(bd->SharedMetalContext.device); +} + +static void ImGui_ImplMetal_SetupRenderState(ImDrawData* drawData, id commandBuffer, + id commandEncoder, id renderPipelineState, + MetalBuffer* vertexBuffer, size_t vertexBufferOffset) +{ + IM_UNUSED(commandBuffer); + ImGui_ImplMetal_Data* bd = ImGui_ImplMetal_GetBackendData(); + [commandEncoder setCullMode:MTLCullModeNone]; + [commandEncoder setDepthStencilState:bd->SharedMetalContext.depthStencilState]; + + // Setup viewport, orthographic projection matrix + // Our visible imgui space lies from draw_data->DisplayPos (top left) to + // draw_data->DisplayPos+data_data->DisplaySize (bottom right). DisplayMin is typically (0,0) for single viewport apps. + MTLViewport viewport = + { + .originX = 0.0, + .originY = 0.0, + .width = (double)(drawData->DisplaySize.x * drawData->FramebufferScale.x), + .height = (double)(drawData->DisplaySize.y * drawData->FramebufferScale.y), + .znear = 0.0, + .zfar = 1.0 + }; + [commandEncoder setViewport:viewport]; + + float L = drawData->DisplayPos.x; + float R = drawData->DisplayPos.x + drawData->DisplaySize.x; + float T = drawData->DisplayPos.y; + float B = drawData->DisplayPos.y + drawData->DisplaySize.y; + float N = (float)viewport.znear; + float F = (float)viewport.zfar; + const float ortho_projection[4][4] = + { + { 2.0f/(R-L), 0.0f, 0.0f, 0.0f }, + { 0.0f, 2.0f/(T-B), 0.0f, 0.0f }, + { 0.0f, 0.0f, 1/(F-N), 0.0f }, + { (R+L)/(L-R), (T+B)/(B-T), N/(F-N), 1.0f }, + }; + [commandEncoder setVertexBytes:&ortho_projection length:sizeof(ortho_projection) atIndex:1]; + + [commandEncoder setRenderPipelineState:renderPipelineState]; + + [commandEncoder setVertexBuffer:vertexBuffer.buffer offset:0 atIndex:0]; + [commandEncoder setVertexBufferOffset:vertexBufferOffset atIndex:0]; +} + +// Metal Render function. +void ImGui_ImplMetal_RenderDrawData(ImDrawData* drawData, id commandBuffer, id commandEncoder) +{ + ImGui_ImplMetal_Data* bd = ImGui_ImplMetal_GetBackendData(); + MetalContext* ctx = bd->SharedMetalContext; + + // Avoid rendering when minimized, scale coordinates for retina displays (screen coordinates != framebuffer coordinates) + int fb_width = (int)(drawData->DisplaySize.x * drawData->FramebufferScale.x); + int fb_height = (int)(drawData->DisplaySize.y * drawData->FramebufferScale.y); + if (fb_width <= 0 || fb_height <= 0 || drawData->CmdListsCount == 0) + return; + + // Try to retrieve a render pipeline state that is compatible with the framebuffer config for this frame + // The hit rate for this cache should be very near 100%. + id renderPipelineState = ctx.renderPipelineStateCache[ctx.framebufferDescriptor]; + if (renderPipelineState == nil) + { + // No luck; make a new render pipeline state + renderPipelineState = [ctx renderPipelineStateForFramebufferDescriptor:ctx.framebufferDescriptor device:commandBuffer.device]; + + // Cache render pipeline state for later reuse + ctx.renderPipelineStateCache[ctx.framebufferDescriptor] = renderPipelineState; + } + + size_t vertexBufferLength = (size_t)drawData->TotalVtxCount * sizeof(ImDrawVert); + size_t indexBufferLength = (size_t)drawData->TotalIdxCount * sizeof(ImDrawIdx); + MetalBuffer* vertexBuffer = [ctx dequeueReusableBufferOfLength:vertexBufferLength device:commandBuffer.device]; + MetalBuffer* indexBuffer = [ctx dequeueReusableBufferOfLength:indexBufferLength device:commandBuffer.device]; + + ImGui_ImplMetal_SetupRenderState(drawData, commandBuffer, commandEncoder, renderPipelineState, vertexBuffer, 0); + + // Will project scissor/clipping rectangles into framebuffer space + ImVec2 clip_off = drawData->DisplayPos; // (0,0) unless using multi-viewports + ImVec2 clip_scale = drawData->FramebufferScale; // (1,1) unless using retina display which are often (2,2) + + // Render command lists + size_t vertexBufferOffset = 0; + size_t indexBufferOffset = 0; + for (int n = 0; n < drawData->CmdListsCount; n++) + { + const ImDrawList* draw_list = drawData->CmdLists[n]; + + memcpy((char*)vertexBuffer.buffer.contents + vertexBufferOffset, draw_list->VtxBuffer.Data, (size_t)draw_list->VtxBuffer.Size * sizeof(ImDrawVert)); + memcpy((char*)indexBuffer.buffer.contents + indexBufferOffset, draw_list->IdxBuffer.Data, (size_t)draw_list->IdxBuffer.Size * sizeof(ImDrawIdx)); + + for (int cmd_i = 0; cmd_i < draw_list->CmdBuffer.Size; cmd_i++) + { + const ImDrawCmd* pcmd = &draw_list->CmdBuffer[cmd_i]; + if (pcmd->UserCallback) + { + // User callback, registered via ImDrawList::AddCallback() + // (ImDrawCallback_ResetRenderState is a special callback value used by the user to request the renderer to reset render state.) + if (pcmd->UserCallback == ImDrawCallback_ResetRenderState) + ImGui_ImplMetal_SetupRenderState(drawData, commandBuffer, commandEncoder, renderPipelineState, vertexBuffer, vertexBufferOffset); + else + pcmd->UserCallback(draw_list, pcmd); + } + else + { + // Project scissor/clipping rectangles into framebuffer space + ImVec2 clip_min((pcmd->ClipRect.x - clip_off.x) * clip_scale.x, (pcmd->ClipRect.y - clip_off.y) * clip_scale.y); + ImVec2 clip_max((pcmd->ClipRect.z - clip_off.x) * clip_scale.x, (pcmd->ClipRect.w - clip_off.y) * clip_scale.y); + + // Clamp to viewport as setScissorRect() won't accept values that are off bounds + if (clip_min.x < 0.0f) { clip_min.x = 0.0f; } + if (clip_min.y < 0.0f) { clip_min.y = 0.0f; } + if (clip_max.x > fb_width) { clip_max.x = (float)fb_width; } + if (clip_max.y > fb_height) { clip_max.y = (float)fb_height; } + if (clip_max.x <= clip_min.x || clip_max.y <= clip_min.y) + continue; + if (pcmd->ElemCount == 0) // drawIndexedPrimitives() validation doesn't accept this + continue; + + // Apply scissor/clipping rectangle + MTLScissorRect scissorRect = + { + .x = NSUInteger(clip_min.x), + .y = NSUInteger(clip_min.y), + .width = NSUInteger(clip_max.x - clip_min.x), + .height = NSUInteger(clip_max.y - clip_min.y) + }; + [commandEncoder setScissorRect:scissorRect]; + + // Bind texture, Draw + if (ImTextureID tex_id = pcmd->GetTexID()) + [commandEncoder setFragmentTexture:(__bridge id)(void*)(intptr_t)(tex_id) atIndex:0]; + + [commandEncoder setVertexBufferOffset:(vertexBufferOffset + pcmd->VtxOffset * sizeof(ImDrawVert)) atIndex:0]; + [commandEncoder drawIndexedPrimitives:MTLPrimitiveTypeTriangle + indexCount:pcmd->ElemCount + indexType:sizeof(ImDrawIdx) == 2 ? MTLIndexTypeUInt16 : MTLIndexTypeUInt32 + indexBuffer:indexBuffer.buffer + indexBufferOffset:indexBufferOffset + pcmd->IdxOffset * sizeof(ImDrawIdx)]; + } + } + + vertexBufferOffset += (size_t)draw_list->VtxBuffer.Size * sizeof(ImDrawVert); + indexBufferOffset += (size_t)draw_list->IdxBuffer.Size * sizeof(ImDrawIdx); + } + + MetalContext* sharedMetalContext = bd->SharedMetalContext; + [commandBuffer addCompletedHandler:^(id) + { + dispatch_async(dispatch_get_main_queue(), ^{ + @synchronized(sharedMetalContext.bufferCache) + { + [sharedMetalContext.bufferCache addObject:vertexBuffer]; + [sharedMetalContext.bufferCache addObject:indexBuffer]; + } + }); + }]; +} + +bool ImGui_ImplMetal_CreateFontsTexture(id device) +{ ImGui_ImplMetal_Data* bd = ImGui_ImplMetal_GetBackendData(); + ImGuiIO& io = ImGui::GetIO(); + + // We are retrieving and uploading the font atlas as a 4-channels RGBA texture here. + // In theory we could call GetTexDataAsAlpha8() and upload a 1-channel texture to save on memory access bandwidth. + // However, using a shader designed for 1-channel texture would make it less obvious to use the ImTextureID facility to render users own textures. + // You can make that change in your implementation. + unsigned char* pixels; + int width, height; + io.Fonts->GetTexDataAsRGBA32(&pixels, &width, &height); + MTLTextureDescriptor* textureDescriptor = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:MTLPixelFormatRGBA8Unorm + width:(NSUInteger)width + height:(NSUInteger)height + mipmapped:NO]; + textureDescriptor.usage = MTLTextureUsageShaderRead; +#if TARGET_OS_OSX || TARGET_OS_MACCATALYST + textureDescriptor.storageMode = MTLStorageModeManaged; +#else + textureDescriptor.storageMode = MTLStorageModeShared; +#endif + id texture = [device newTextureWithDescriptor:textureDescriptor]; + [texture replaceRegion:MTLRegionMake2D(0, 0, (NSUInteger)width, (NSUInteger)height) mipmapLevel:0 withBytes:pixels bytesPerRow:(NSUInteger)width * 4]; + bd->SharedMetalContext.fontTexture = texture; + io.Fonts->SetTexID((ImTextureID)(intptr_t)(__bridge void*)bd->SharedMetalContext.fontTexture); // ImTextureID == ImU64 + + return (bd->SharedMetalContext.fontTexture != nil); +} + +void ImGui_ImplMetal_DestroyFontsTexture() +{ + ImGui_ImplMetal_Data* bd = ImGui_ImplMetal_GetBackendData(); + ImGuiIO& io = ImGui::GetIO(); + bd->SharedMetalContext.fontTexture = nil; + io.Fonts->SetTexID(0); +} + +bool ImGui_ImplMetal_CreateDeviceObjects(id device) +{ + ImGui_ImplMetal_Data* bd = ImGui_ImplMetal_GetBackendData(); + MTLDepthStencilDescriptor* depthStencilDescriptor = [[MTLDepthStencilDescriptor alloc] init]; + depthStencilDescriptor.depthWriteEnabled = NO; + depthStencilDescriptor.depthCompareFunction = MTLCompareFunctionAlways; + bd->SharedMetalContext.depthStencilState = [device newDepthStencilStateWithDescriptor:depthStencilDescriptor]; +#ifdef IMGUI_IMPL_METAL_CPP + [depthStencilDescriptor release]; +#endif + ImGui_ImplMetal_CreateFontsTexture(device); + return true; +} + +void ImGui_ImplMetal_DestroyDeviceObjects() +{ + ImGui_ImplMetal_Data* bd = ImGui_ImplMetal_GetBackendData(); + ImGui_ImplMetal_DestroyFontsTexture(); + [bd->SharedMetalContext.renderPipelineStateCache removeAllObjects]; +} + +#pragma mark - MetalBuffer implementation + +@implementation MetalBuffer +- (instancetype)initWithBuffer:(id)buffer +{ + if ((self = [super init])) + { + _buffer = buffer; + _lastReuseTime = GetMachAbsoluteTimeInSeconds(); + } + return self; +} +@end + +#pragma mark - FramebufferDescriptor implementation + +@implementation FramebufferDescriptor +- (instancetype)initWithRenderPassDescriptor:(MTLRenderPassDescriptor*)renderPassDescriptor +{ + if ((self = [super init])) + { + _sampleCount = renderPassDescriptor.colorAttachments[0].texture.sampleCount; + _colorPixelFormat = renderPassDescriptor.colorAttachments[0].texture.pixelFormat; + _depthPixelFormat = renderPassDescriptor.depthAttachment.texture.pixelFormat; + _stencilPixelFormat = renderPassDescriptor.stencilAttachment.texture.pixelFormat; + } + return self; +} + +- (nonnull id)copyWithZone:(nullable NSZone*)zone +{ + FramebufferDescriptor* copy = [[FramebufferDescriptor allocWithZone:zone] init]; + copy.sampleCount = self.sampleCount; + copy.colorPixelFormat = self.colorPixelFormat; + copy.depthPixelFormat = self.depthPixelFormat; + copy.stencilPixelFormat = self.stencilPixelFormat; + return copy; +} + +- (NSUInteger)hash +{ + NSUInteger sc = _sampleCount & 0x3; + NSUInteger cf = _colorPixelFormat & 0x3FF; + NSUInteger df = _depthPixelFormat & 0x3FF; + NSUInteger sf = _stencilPixelFormat & 0x3FF; + NSUInteger hash = (sf << 22) | (df << 12) | (cf << 2) | sc; + return hash; +} + +- (BOOL)isEqual:(id)object +{ + FramebufferDescriptor* other = object; + if (![other isKindOfClass:[FramebufferDescriptor class]]) + return NO; + return other.sampleCount == self.sampleCount && + other.colorPixelFormat == self.colorPixelFormat && + other.depthPixelFormat == self.depthPixelFormat && + other.stencilPixelFormat == self.stencilPixelFormat; +} + +@end + +#pragma mark - MetalContext implementation + +@implementation MetalContext +- (instancetype)init +{ + if ((self = [super init])) + { + self.renderPipelineStateCache = [NSMutableDictionary dictionary]; + self.bufferCache = [NSMutableArray array]; + _lastBufferCachePurge = GetMachAbsoluteTimeInSeconds(); + } + return self; +} + +- (MetalBuffer*)dequeueReusableBufferOfLength:(NSUInteger)length device:(id)device +{ + uint64_t now = GetMachAbsoluteTimeInSeconds(); + + @synchronized(self.bufferCache) + { + // Purge old buffers that haven't been useful for a while + if (now - self.lastBufferCachePurge > 1.0) + { + NSMutableArray* survivors = [NSMutableArray array]; + for (MetalBuffer* candidate in self.bufferCache) + if (candidate.lastReuseTime > self.lastBufferCachePurge) + [survivors addObject:candidate]; + self.bufferCache = [survivors mutableCopy]; + self.lastBufferCachePurge = now; + } + + // See if we have a buffer we can reuse + MetalBuffer* bestCandidate = nil; + for (MetalBuffer* candidate in self.bufferCache) + if (candidate.buffer.length >= length && (bestCandidate == nil || bestCandidate.lastReuseTime > candidate.lastReuseTime)) + bestCandidate = candidate; + + if (bestCandidate != nil) + { + [self.bufferCache removeObject:bestCandidate]; + bestCandidate.lastReuseTime = now; + return bestCandidate; + } + } + + // No luck; make a new buffer + id backing = [device newBufferWithLength:length options:MTLResourceStorageModeShared]; + return [[MetalBuffer alloc] initWithBuffer:backing]; +} + +// Bilinear sampling is required by default. Set 'io.Fonts->Flags |= ImFontAtlasFlags_NoBakedLines' or 'style.AntiAliasedLinesUseTex = false' to allow point/nearest sampling. +- (id)renderPipelineStateForFramebufferDescriptor:(FramebufferDescriptor*)descriptor device:(id)device +{ + NSError* error = nil; + + NSString* shaderSource = @"" + "#include \n" + "using namespace metal;\n" + "\n" + "struct Uniforms {\n" + " float4x4 projectionMatrix;\n" + "};\n" + "\n" + "struct VertexIn {\n" + " float2 position [[attribute(0)]];\n" + " float2 texCoords [[attribute(1)]];\n" + " uchar4 color [[attribute(2)]];\n" + "};\n" + "\n" + "struct VertexOut {\n" + " float4 position [[position]];\n" + " float2 texCoords;\n" + " float4 color;\n" + "};\n" + "\n" + "vertex VertexOut vertex_main(VertexIn in [[stage_in]],\n" + " constant Uniforms &uniforms [[buffer(1)]]) {\n" + " VertexOut out;\n" + " out.position = uniforms.projectionMatrix * float4(in.position, 0, 1);\n" + " out.texCoords = in.texCoords;\n" + " out.color = float4(in.color) / float4(255.0);\n" + " return out;\n" + "}\n" + "\n" + "fragment half4 fragment_main(VertexOut in [[stage_in]],\n" + " texture2d texture [[texture(0)]]) {\n" + " constexpr sampler linearSampler(coord::normalized, address::clamp_to_border, min_filter::linear, mag_filter::linear, mip_filter::linear);\n" + " half4 texColor = texture.sample(linearSampler, in.texCoords);\n" + " return half4(in.color) * texColor;\n" + "}\n"; + + id library = [device newLibraryWithSource:shaderSource options:nil error:&error]; + if (library == nil) + { + NSLog(@"Error: failed to create Metal library: %@", error); + return nil; + } + + id vertexFunction = [library newFunctionWithName:@"vertex_main"]; + id fragmentFunction = [library newFunctionWithName:@"fragment_main"]; + + if (vertexFunction == nil || fragmentFunction == nil) + { + NSLog(@"Error: failed to find Metal shader functions in library: %@", error); + return nil; + } + + MTLVertexDescriptor* vertexDescriptor = [MTLVertexDescriptor vertexDescriptor]; + vertexDescriptor.attributes[0].offset = offsetof(ImDrawVert, pos); + vertexDescriptor.attributes[0].format = MTLVertexFormatFloat2; // position + vertexDescriptor.attributes[0].bufferIndex = 0; + vertexDescriptor.attributes[1].offset = offsetof(ImDrawVert, uv); + vertexDescriptor.attributes[1].format = MTLVertexFormatFloat2; // texCoords + vertexDescriptor.attributes[1].bufferIndex = 0; + vertexDescriptor.attributes[2].offset = offsetof(ImDrawVert, col); + vertexDescriptor.attributes[2].format = MTLVertexFormatUChar4; // color + vertexDescriptor.attributes[2].bufferIndex = 0; + vertexDescriptor.layouts[0].stepRate = 1; + vertexDescriptor.layouts[0].stepFunction = MTLVertexStepFunctionPerVertex; + vertexDescriptor.layouts[0].stride = sizeof(ImDrawVert); + + MTLRenderPipelineDescriptor* pipelineDescriptor = [[MTLRenderPipelineDescriptor alloc] init]; + pipelineDescriptor.vertexFunction = vertexFunction; + pipelineDescriptor.fragmentFunction = fragmentFunction; + pipelineDescriptor.vertexDescriptor = vertexDescriptor; + pipelineDescriptor.rasterSampleCount = self.framebufferDescriptor.sampleCount; + pipelineDescriptor.colorAttachments[0].pixelFormat = self.framebufferDescriptor.colorPixelFormat; + pipelineDescriptor.colorAttachments[0].blendingEnabled = YES; + pipelineDescriptor.colorAttachments[0].rgbBlendOperation = MTLBlendOperationAdd; + pipelineDescriptor.colorAttachments[0].sourceRGBBlendFactor = MTLBlendFactorSourceAlpha; + pipelineDescriptor.colorAttachments[0].destinationRGBBlendFactor = MTLBlendFactorOneMinusSourceAlpha; + pipelineDescriptor.colorAttachments[0].alphaBlendOperation = MTLBlendOperationAdd; + pipelineDescriptor.colorAttachments[0].sourceAlphaBlendFactor = MTLBlendFactorOne; + pipelineDescriptor.colorAttachments[0].destinationAlphaBlendFactor = MTLBlendFactorOneMinusSourceAlpha; + pipelineDescriptor.depthAttachmentPixelFormat = self.framebufferDescriptor.depthPixelFormat; + pipelineDescriptor.stencilAttachmentPixelFormat = self.framebufferDescriptor.stencilPixelFormat; + + id renderPipelineState = [device newRenderPipelineStateWithDescriptor:pipelineDescriptor error:&error]; + if (error != nil) + NSLog(@"Error: failed to create Metal pipeline state: %@", error); + + return renderPipelineState; +} + +@end + +//----------------------------------------------------------------------------- + +#endif // #ifndef IMGUI_DISABLE \ No newline at end of file diff --git a/core/hw/pvr/Renderer_if.cpp b/core/hw/pvr/Renderer_if.cpp index 7f669f3fd0..eb3afd4c0b 100644 --- a/core/hw/pvr/Renderer_if.cpp +++ b/core/hw/pvr/Renderer_if.cpp @@ -268,6 +268,7 @@ Renderer* rend_norend(); Renderer* rend_Vulkan(); Renderer* rend_OITVulkan(); Renderer* rend_DirectX9(); +Renderer* rend_Metal(); Renderer* rend_DirectX11(); Renderer* rend_OITDirectX11(); @@ -302,6 +303,11 @@ static void rend_create_renderer() renderer = rend_DirectX9(); break; #endif +#ifdef USE_METAL + case RenderType::Metal: + renderer = rend_Metal(); + break; +#endif #ifdef USE_DX11 case RenderType::DirectX11: renderer = rend_DirectX11(); diff --git a/core/hw/pvr/elan.cpp b/core/hw/pvr/elan.cpp index 7b4c852027..01c19661a1 100644 --- a/core/hw/pvr/elan.cpp +++ b/core/hw/pvr/elan.cpp @@ -1162,7 +1162,7 @@ static void sendLights() return; state.lightModelUpdated = false; - N2LightModel model; + N2LightModel model {}; model.lightCount = 0; if (curLightModel == nullptr) { diff --git a/core/hw/pvr/pvr_regs.h b/core/hw/pvr/pvr_regs.h index 02faa6fabc..fa33d9838c 100644 --- a/core/hw/pvr/pvr_regs.h +++ b/core/hw/pvr/pvr_regs.h @@ -151,7 +151,7 @@ struct VO_BORDER_COL_type : RGBAColorTemplate }; }; -struct RGBColor : RGBAColorTemplate +struct RGB_Color : RGBAColorTemplate { union { struct @@ -165,7 +165,7 @@ struct RGBColor : RGBAColorTemplate }; }; -struct RGBAColor : RGBAColorTemplate +struct RGBA_Color : RGBAColorTemplate { union { struct @@ -531,11 +531,11 @@ union FOG_DENSITY_type #define SDRAM_ARB_CFG PvrReg(SDRAM_ARB_CFG_addr,u32) // RW Texture memory arbiter control #define SDRAM_CFG PvrReg(SDRAM_CFG_addr,u32) // RW Texture memory control -#define FOG_COL_RAM PvrReg(FOG_COL_RAM_addr, RGBColor) // RW Color for Look Up table Fog -#define FOG_COL_VERT PvrReg(FOG_COL_VERT_addr, RGBColor) // RW Color for vertex Fog +#define FOG_COL_RAM PvrReg(FOG_COL_RAM_addr, RGB_Color) // RW Color for Look Up table Fog +#define FOG_COL_VERT PvrReg(FOG_COL_VERT_addr, RGB_Color) // RW Color for vertex Fog #define FOG_DENSITY PvrReg(FOG_DENSITY_addr, FOG_DENSITY_type) // RW Fog scale value -#define FOG_CLAMP_MAX PvrReg(FOG_CLAMP_MAX_addr, RGBAColor) // RW Color clamping maximum value -#define FOG_CLAMP_MIN PvrReg(FOG_CLAMP_MIN_addr, RGBAColor) // RW Color clamping minimum value +#define FOG_CLAMP_MAX PvrReg(FOG_CLAMP_MAX_addr, RGBA_Color) // RW Color clamping maximum value +#define FOG_CLAMP_MIN PvrReg(FOG_CLAMP_MIN_addr, RGBA_Color) // RW Color clamping minimum value #define SPG_TRIGGER_POS PvrReg(SPG_TRIGGER_POS_addr,u32) // RW External trigger signal HV counter value #define SPG_HBLANK_INT PvrReg(SPG_HBLANK_INT_addr,SPG_HBLANK_INT_type) // RW H-blank interrupt control #define SPG_VBLANK_INT PvrReg(SPG_VBLANK_INT_addr,SPG_VBLANK_INT_type) // RW V-blank interrupt control diff --git a/core/hw/pvr/ta_ctx.h b/core/hw/pvr/ta_ctx.h index b12df6e1b4..6507d2ff9d 100644 --- a/core/hw/pvr/ta_ctx.h +++ b/core/hw/pvr/ta_ctx.h @@ -244,8 +244,8 @@ struct rend_context u32 framebufferWidth; u32 framebufferHeight; - RGBAColor fog_clamp_min; - RGBAColor fog_clamp_max; + RGBA_Color fog_clamp_min; + RGBA_Color fog_clamp_max; std::vector verts; std::vector idx; diff --git a/core/rend/metal/metal.h b/core/rend/metal/metal.h new file mode 100644 index 0000000000..0223cdd55b --- /dev/null +++ b/core/rend/metal/metal.h @@ -0,0 +1,52 @@ +/* + Copyright 2025 flyinghead + + This file is part of Flycast. + + Flycast is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + Flycast is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Flycast. If not, see . +*/ + +#pragma once + +#include "types.h" + +class MetalDeletable +{ +public: + virtual ~MetalDeletable() = default; +}; + +class MetalFlightManager +{ +public: + virtual void addToFlight(MetalDeletable *object) = 0; + + virtual ~MetalFlightManager() = default; +}; + +template +class MetalDeleter : public MetalDeletable +{ +public: + MetalDeleter() = delete; + explicit MetalDeleter(T& o) : o(o) {} + MetalDeleter(T&& o) : o(std::move(o)) {} + ~MetalDeleter() override { + if constexpr (std::is_pointer_v) + delete o; + } + +private: + T o; +}; \ No newline at end of file diff --git a/core/rend/metal/metal_buffer.h b/core/rend/metal/metal_buffer.h new file mode 100644 index 0000000000..3e0a864bd9 --- /dev/null +++ b/core/rend/metal/metal_buffer.h @@ -0,0 +1,119 @@ +/* + Copyright 2025 flyinghead + + This file is part of Flycast. + + Flycast is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + Flycast is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Flycast. If not, see . +*/ + +#pragma once +#include "types.h" +#include +#include + +struct MetalBufferData +{ + MetalBufferData(u64 size); + ~MetalBufferData() + { + [buffer setPurgeableState:MTLPurgeableStateEmpty]; + buffer = nil; + } + + void upload(u32 size, const void *data, u32 bufOffset = 0) const + { + verify(bufOffset + size <= bufferSize); + + void* dataPtr = (u8 *)[buffer contents] + bufOffset; + memcpy(dataPtr, data, size); + } + + void upload(size_t count, const u32 *sizes, const void * const *data, u32 bufOffset = 0) const + { + u32 totalSize = 0; + for (size_t i = 0; i < count; ++i) + totalSize += sizes[i]; + verify(bufOffset + totalSize <= bufferSize); + void* dataPtr = (u8 *)[buffer contents] + bufOffset; + for (size_t i = 0; i < count; ++i) + { + if (data[i] != nullptr) + memcpy(dataPtr, data[i], sizes[i]); + dataPtr = (u8 *)dataPtr + sizes[i]; + } + } + + void download(u32 size, void *data, u32 bufOffset = 0) const + { + verify(bufOffset + size <= bufferSize); + + void* dataPtr = (u8 *)[buffer contents] + bufOffset; + memcpy(data, dataPtr, size); + } + + id buffer; + u64 bufferSize; +}; + +class MetalBufferPacker +{ +public: + MetalBufferPacker(); + + u64 addUniform(const void *p, size_t size) { + return add(p, size); + } + + u64 addStorage(const void *p, size_t size) { + return add(p, size); + } + + u64 add(const void *p, size_t size) + { + u32 padding = align(offset, 16); + if (padding != 0) + { + chunks.push_back(nullptr); + chunkSizes.push_back(padding); + offset += padding; + } + u64 start = offset; + chunks.push_back(p); + chunkSizes.push_back(size); + offset += size; + + return start; + } + + void upload(MetalBufferData& bufferData, u32 bufOffset = 0) + { + if (!chunks.empty()) + bufferData.upload(chunks.size(), &chunkSizes[0], &chunks[0], bufOffset); + } + + u64 size() const { + return offset; + } + + static inline u32 align(u64 offset, u32 alignment) + { + u32 pad = (u32)(offset & (alignment - 1)); + return pad == 0 ? 0 : alignment - pad; + } + +private: + std::vector chunks; + std::vector chunkSizes; + u64 offset = 0; +}; \ No newline at end of file diff --git a/core/rend/metal/metal_buffer.mm b/core/rend/metal/metal_buffer.mm new file mode 100644 index 0000000000..3f207f632c --- /dev/null +++ b/core/rend/metal/metal_buffer.mm @@ -0,0 +1,29 @@ +/* + Copyright 2025 flyinghead + + This file is part of Flycast. + + Flycast is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + Flycast is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Flycast. If not, see . +*/ + +#include "metal_buffer.h" +#include "metal_context.h" + +MetalBufferData::MetalBufferData(u64 size) : bufferSize(size) { + auto device = MetalContext::Instance()->GetDevice(); + + buffer = [device newBufferWithLength:size options:MTLResourceStorageModeShared]; +} + +MetalBufferPacker::MetalBufferPacker() {} diff --git a/core/rend/metal/metal_commandpool.h b/core/rend/metal/metal_commandpool.h new file mode 100644 index 0000000000..94513a4199 --- /dev/null +++ b/core/rend/metal/metal_commandpool.h @@ -0,0 +1,53 @@ +/* + Copyright 2025 flyinghead + + This file is part of Flycast. + + Flycast is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + Flycast is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Flycast. If not, see . +*/ + +#pragma once +#include "metal.h" + +#include +#include + +class MetalCommandPool : public MetalFlightManager +{ +public: + void Init(size_t chainSize = 3); + void Term(); + void BeginFrame(); + void EndFrame(); + void EndFrameAndWait(); + id Allocate(); + + int GetIndex() const { + return index; + } + + void addToFlight(MetalDeletable *object) override { + inFlightObjects[index].emplace_back(object); + } + +private: + int index = 0; + std::vector> inFlightBuffers; + std::vector> events; + size_t chainSize; + std::vector>> inFlightObjects; + bool frameStarted = false; + id queue = nil; + id device = nil; +}; diff --git a/core/rend/metal/metal_commandpool.mm b/core/rend/metal/metal_commandpool.mm new file mode 100644 index 0000000000..72008b651c --- /dev/null +++ b/core/rend/metal/metal_commandpool.mm @@ -0,0 +1,98 @@ +/* + Copyright 2025 flyinghead + + This file is part of Flycast. + + Flycast is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + Flycast is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Flycast. If not, see . +*/ + +#include "metal_commandpool.h" +#import "metal_context.h" + +void MetalCommandPool::Init(size_t chainSize) +{ + this->chainSize = chainSize; + device = MetalContext::Instance()->GetDevice(); + + queue = [device newCommandQueue]; + + ERROR_LOG(RENDERER, "MetalCommandPool::Init chainSize=%d", chainSize); + if (events.size() > chainSize) + { + events.resize(chainSize); + } + else + { + while (events.size() < chainSize) + { + events.push_back(nil); + } + } + + inFlightBuffers.resize(chainSize); + inFlightObjects.resize(chainSize); +} + +void MetalCommandPool::Term() +{ + for (id event : events) + { + if (event != nil) + [event waitUntilSignaledValue:1 timeoutMS:UINT64_MAX]; + } + inFlightObjects.clear(); + inFlightBuffers.clear(); + events.clear(); +} + +void MetalCommandPool::BeginFrame() +{ + if (frameStarted) + return; + frameStarted = true; + index = (index + 1) % chainSize; + if (events[index] != nil) + [events[index] waitUntilSignaledValue:1 timeoutMS:UINT64_MAX]; + inFlightBuffers[index] = nil; + inFlightObjects[index].clear(); +} + +void MetalCommandPool::EndFrame() +{ + if (!frameStarted) + return; + frameStarted = false; + events[index] = [device newSharedEvent]; + [inFlightBuffers[index] encodeSignalEvent:events[index] value:1]; + [inFlightBuffers[index] commit]; +} + +id MetalCommandPool::Allocate() +{ + verify(frameStarted); + if (inFlightBuffers[index] == nil) + inFlightBuffers[index] = [queue commandBuffer]; + return inFlightBuffers[index]; +} + +void MetalCommandPool::EndFrameAndWait() +{ + EndFrame(); + for (id event : events) + { + if (event != nil) + [event waitUntilSignaledValue:1 timeoutMS:UINT64_MAX]; + } + inFlightObjects[index].clear(); +} \ No newline at end of file diff --git a/core/rend/metal/metal_context.h b/core/rend/metal/metal_context.h new file mode 100644 index 0000000000..9024a72e87 --- /dev/null +++ b/core/rend/metal/metal_context.h @@ -0,0 +1,111 @@ +/* + Copyright 2025 flyinghead + + This file is part of Flycast. + + Flycast is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + Flycast is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Flycast. If not, see . +*/ + +#pragma once + +#include +#include + +#include "rend/transform_matrix.h" +#include "wsi/context.h" +#include "metal_quad.h" + +class MetalContext : public GraphicsContext +{ +public: + MetalContext(); + ~MetalContext() override; + + bool init(); + void term() override; + + void resize() override { resized = true; } + bool IsValid() { return width != 0 && height != 0; } + void NewFrame(); + void BeginRenderPass(); + void EndFrame(); + void Present(); + void PresentFrame(id texture, MTLViewport viewport, float aspectRatio); + void PresentLastFrame(); + bool GetLastFrame(std::vector& data, int& width, int& height); + + id GetDevice() const { return device; } + CAMetalLayer* GetLayer() const { return layer; } + id GetQueue() const { return queue; } + MTLRenderPassDescriptor* GetDescriptor() const { return renderPassDescriptor; } + id GetEncoder() const { return commandEncoder; } + id GetCommandBuffer() const { return commandBuffers[currentImage]; } + bool IsRendering() const { return rendering; } + + std::string getDriverName() override; + + std::string getDriverVersion() override { + return ""; + } + + bool isAMD() override { + return false; + } + + bool hasPerPixel() override { + return false; + } + bool recreateSwapChainIfNeeded(); + + static MetalContext* Instance() { return contextInstance; } +private: + void CreateSwapChain(); + void DrawFrame(id texture, MTLViewport viewport, float aspectRatio); + + bool HasSurfaceDimensionChanged() const; + void SetWindowSize(u32 width, u32 height); + + bool rendering = false; + bool renderDone = false; + u32 width = 0; + u32 height = 0; + bool resized = false; + bool swapOnVSync = true; + int swapInterval = 1; + + u32 currentImage = 0; + + id currentDrawable = nil; + MTLRenderPassDescriptor *renderPassDescriptor = nil; + + std::vector> commandBuffers; + id commandEncoder; + + std::unique_ptr quadPipelineWithAlpha; + std::unique_ptr quadPipeline; + std::unique_ptr quadRotatePipeline; + std::unique_ptr quadDrawer; + std::unique_ptr quadRotateDrawer; + std::unique_ptr shaderManager; + + id lastFrameTexture = nil; + MTLViewport lastFrameViewport; + float lastFrameAR = 0.f; + + id device = nil; + id queue = nil; + CAMetalLayer* layer; + + static MetalContext* contextInstance; +}; \ No newline at end of file diff --git a/core/rend/metal/metal_context.mm b/core/rend/metal/metal_context.mm new file mode 100644 index 0000000000..66f5f3f5dd --- /dev/null +++ b/core/rend/metal/metal_context.mm @@ -0,0 +1,456 @@ +/* + Copyright 2025 flyinghead + + This file is part of Flycast. + + Flycast is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + Flycast is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Flycast. If not, see . +*/ + +#include "metal_context.h" +#include "metal_driver.h" +#ifdef USE_SDL +#include "sdl/sdl.h" +#endif +#include "ui/imgui_driver.h" +#import "metal_buffer.h" + +MetalContext *MetalContext::contextInstance; + +void MetalContext::CreateSwapChain() +{ + // WAIT IDLE + + commandBuffers.clear(); + + [layer setPixelFormat:MTLPixelFormatBGRA8Unorm]; + [layer setFramebufferOnly:TRUE]; + [layer setColorspace:CGColorSpaceCreateWithName(kCGColorSpaceSRGB)]; + [layer setMaximumDrawableCount:3]; +#if TARGET_OS_MAC || TARGET_OS_MACCATALYST + [layer setDisplaySyncEnabled:TRUE]; +#endif + + auto size = [layer drawableSize]; + width = size.width; + height = size.height; + SetWindowSize(width, height); + resized = false; + + if (swapOnVSync && config::DupeFrames && settings.display.refreshRate > 60.f) + swapInterval = settings.display.refreshRate / 60.f; + else + swapInterval = 1; + + commandBuffers.resize(3); + + quadPipeline->Init(shaderManager.get()); + quadPipelineWithAlpha->Init(shaderManager.get()); + quadDrawer->Init(quadPipeline.get()); + quadRotatePipeline->Init(shaderManager.get()); + quadRotateDrawer->Init(quadRotatePipeline.get()); + + currentImage = 2; + + ERROR_LOG(RENDERER, "Metal swap chain created: %d x %d, swap chain size %d", width, height, 3); +} + +bool MetalContext::init() +{ + GraphicsContext::instance = this; + +#ifdef USE_SDL + if (!sdl_recreate_window(SDL_WINDOW_METAL)) + return false; + + auto view = SDL_Metal_CreateView((SDL_Window *)window); + + if (view == nullptr) { + term(); + ERROR_LOG(RENDERER, "Failed to create SDL Metal View"); + return false; + } + + layer = static_cast(SDL_Metal_GetLayer(view)); +#endif + + device = MTLCreateSystemDefaultDevice(); + + if (!device) { + term(); + NOTICE_LOG(RENDERER, "Metal Device is null."); + return false; + } + + [layer setDevice:device]; + queue = [device newCommandQueue]; + + shaderManager = std::make_unique(); + quadPipeline = std::make_unique(true, false); + quadPipelineWithAlpha = std::make_unique(false, false); + quadDrawer = std::make_unique(); + quadRotatePipeline = std::make_unique(true, true); + quadRotateDrawer = std::make_unique(); + + NOTICE_LOG(RENDERER, "Created Metal view."); + + imguiDriver = std::unique_ptr(new MetalDriver()); + + CreateSwapChain(); + + return true; +} + +std::string MetalContext::getDriverName() { + return [[device name] UTF8String]; +} + +bool MetalContext::recreateSwapChainIfNeeded() +{ + if (resized || HasSurfaceDimensionChanged()) + { + CreateSwapChain(); + lastFrameTexture = nil; + return true; + } + else + return false; +} + +void MetalContext::BeginRenderPass() { + recreateSwapChainIfNeeded(); + if (!IsValid()) + return; + + currentDrawable = [layer nextDrawable]; + + if (!renderPassDescriptor) { + renderPassDescriptor = [[MTLRenderPassDescriptor alloc] init]; + } + + auto colorAttachment = renderPassDescriptor.colorAttachments[0]; + [colorAttachment setTexture:currentDrawable.texture]; + [colorAttachment setLoadAction:MTLLoadActionClear]; + [colorAttachment setStoreAction:MTLStoreActionStore]; + [colorAttachment setClearColor:MTLClearColorMake(VO_BORDER_COL.red(), VO_BORDER_COL.green(), VO_BORDER_COL.blue(), 1.0f)]; + + if (currentImage >= commandBuffers.size()) { + commandBuffers.resize(currentImage + 1); + } + + if (!commandBuffers[currentImage]) { + commandBuffers[currentImage] = [queue commandBuffer]; + [commandBuffers[currentImage] setLabel:@"Render Frame"]; + } + + commandEncoder = [commandBuffers[currentImage] renderCommandEncoderWithDescriptor: renderPassDescriptor]; + [commandBuffers[currentImage] presentDrawable:currentDrawable]; +}; + +void MetalContext::NewFrame() { + if (!IsValid()) + return; + + currentImage = (currentImage + 1) % 3; + currentDrawable = nil; + verify(!rendering); + rendering = true; +} + +void MetalContext::EndFrame() { + if (!IsValid()) + return; + + [commandEncoder endEncoding]; + [commandBuffers[currentImage] commit]; + [commandBuffers[currentImage] waitUntilCompleted]; + commandBuffers[currentImage] = nil; + + verify(rendering); + rendering = false; + renderDone = true; +} + +void MetalContext::Present() +{ + if (renderDone) + { + if (lastFrameTexture != nil && IsValid() && !gui_is_open()) + for (int i = 1; i < swapInterval; i++) + { + PresentFrame(lastFrameTexture, lastFrameViewport, lastFrameAR); + } + renderDone = false; + } + if (swapOnVSync == (settings.input.fastForwardMode || !config::VSync)) + { + swapOnVSync = (!settings.input.fastForwardMode && config::VSync); + resized = true; + } + if (resized) { + CreateSwapChain(); + lastFrameTexture = nil; + } +} + +void MetalContext::DrawFrame(id texture, MTLViewport viewport, float aspectRatio) { + MetalQuadVertex vtx[4] { + { -1, -1, 0, 0, 1 }, + { 1, -1, 0, 1, 1 }, + { -1, 1, 0, 0, 0 }, + { 1, 1, 0, 1, 0 }, + }; + float shiftX, shiftY; + getVideoShift(shiftX, shiftY); + vtx[0].x = vtx[2].x = -1.f + shiftX * 2.f / viewport.width; + vtx[1].x = vtx[3].x = vtx[0].x + 2; + vtx[0].y = vtx[1].y = -1.f + shiftY * 2.f / viewport.height; + vtx[2].y = vtx[3].y = vtx[0].y + 2; + + [commandEncoder pushDebugGroup:@"DrawFrame"]; + + if (config::Rotate90) + quadRotatePipeline->BindPipeline(commandEncoder); + else + quadPipeline->BindPipeline(commandEncoder); + + float screenAR = (float)width / height; + float dx = 0; + float dy = 0; + if (aspectRatio > screenAR) + dy = height * (1 - screenAR / aspectRatio) / 2; + else + dx = width * (1 - aspectRatio / screenAR) / 2; + + MTLViewport framePort = { dx, dy, width - dx * 2, height - dy * 2, 0, 1 }; + [commandEncoder setViewport:framePort]; + [commandEncoder setScissorRect:MTLScissorRect { (uint)dx, (uint)dy, (uint)(width - dx * 2), (uint)(height - dy * 2) }]; + if (config::Rotate90) + quadRotateDrawer->Draw(commandEncoder, texture, vtx, config::TextureFiltering == 1); + else + quadDrawer->Draw(commandEncoder, texture, vtx, config::TextureFiltering == 1); + + [commandEncoder popDebugGroup]; +} + +void MetalContext::PresentFrame(id texture, MTLViewport viewport, float aspectRatio) +{ + lastFrameTexture = texture; + lastFrameViewport = viewport; + lastFrameAR = aspectRatio; + + if (texture != nil && IsValid()) + { + NewFrame(); + + BeginRenderPass(); + + gui_draw_osd(); + + if (lastFrameTexture != nil) // Might have been nullified if swap chain recreated + DrawFrame(texture, viewport, aspectRatio); + + imguiDriver->renderDrawData(ImGui::GetDrawData(), false); + EndFrame(); + } + else { + if (!IsValid()) + { + ERROR_LOG(RENDERER, "NOT PRESENTING INVALID SIZE!"); + } + } +} + +void MetalContext::PresentLastFrame() +{ + if (lastFrameTexture != nil && IsValid()) + DrawFrame(lastFrameTexture, lastFrameViewport, lastFrameAR); +} + +void MetalContext::term() { + GraphicsContext::instance = nullptr; + lastFrameTexture = nil; + imguiDriver.reset(); + quadDrawer.reset(); + quadPipeline.reset(); + quadPipelineWithAlpha.reset(); + quadRotateDrawer.reset(); + quadRotatePipeline.reset(); + shaderManager.reset(); + commandBuffers.clear(); +} + +bool MetalContext::HasSurfaceDimensionChanged() const +{ + auto size = [layer drawableSize]; + return width != size.width || height != size.height; +} + +void MetalContext::SetWindowSize(u32 width, u32 height) +{ + if (this->width != width && this->height != height) + { + this->width = width; + this->height = height; + + if (width != 0) + settings.display.width = width; + + if (height != 0) + settings.display.height = height; + + resize(); + } +} + +MetalContext::MetalContext() { + verify(contextInstance == nullptr); + contextInstance = this; +} + +MetalContext::~MetalContext() { + verify(contextInstance == this); + contextInstance = nullptr; +} + +bool MetalContext::GetLastFrame(std::vector &data, int &width, int &height) +{ + if (lastFrameTexture == nil) + return false; + + if (width != 0) { + height = width / lastFrameAR; + } + else if (height != 0) { + width = lastFrameAR * height; + } + else + { + width = lastFrameViewport.width; + height = lastFrameViewport.height; + if (config::Rotate90) + std::swap(width, height); + // We need square pixels for PNG + int w = lastFrameAR * height; + if (width > w) + height = width / lastFrameAR; + else + width = w; + } + + MTLTextureDescriptor *renderTargetDesc = [[MTLTextureDescriptor alloc] init]; + renderTargetDesc.width = width; + renderTargetDesc.height = height; + renderTargetDesc.pixelFormat = MTLPixelFormatRGBA8Unorm; + renderTargetDesc.usage = MTLTextureUsageRenderTarget; + renderTargetDesc.storageMode = MTLStorageModePrivate; + + id renderTarget = [device newTextureWithDescriptor:renderTargetDesc]; + [renderTarget setLabel:@"Screenshot Render Target"]; + + NSUInteger bytesPerPixel = 4; + NSUInteger bytesPerRow = width * bytesPerPixel; + NSUInteger bufferSize = bytesPerRow * height; + + id readbackBuffer = [device newBufferWithLength:bufferSize + options:MTLResourceStorageModeShared]; + [readbackBuffer setLabel:@"Screenshot Readback Buffer"]; + + id commandBuffer = [queue commandBuffer]; + [commandBuffer setLabel:@"GetLastFrame"]; + + MTLRenderPassDescriptor *renderPassDesc = [[MTLRenderPassDescriptor alloc] init]; + renderPassDesc.colorAttachments[0].texture = renderTarget; + renderPassDesc.colorAttachments[0].loadAction = MTLLoadActionClear; + renderPassDesc.colorAttachments[0].storeAction = MTLStoreActionStore; + renderPassDesc.colorAttachments[0].clearColor = MTLClearColorMake(0, 0, 0, 1); + + id renderEncoder = [commandBuffer renderCommandEncoderWithDescriptor:renderPassDesc]; + [renderEncoder setLabel:@"GetLastFrame Render"]; + + MTLViewport viewport = { + 0.0, 0.0, + (double)width, (double)height, + 0.0, 1.0 + }; + [renderEncoder setViewport:viewport]; + + MTLScissorRect scissor = { + 0, 0, + (NSUInteger)width, (NSUInteger)height + }; + [renderEncoder setScissorRect:scissor]; + + MetalQuadVertex vtx[4] = { + { -1.f, -1.f, 0.f, 0.f, 1.f }, + { 1.f, -1.f, 0.f, 1.f, 1.f }, + { -1.f, 1.f, 0.f, 0.f, 0.f }, + { 1.f, 1.f, 0.f, 1.f, 0.f }, + }; + + if (config::Rotate90) { + quadRotatePipeline->BindPipeline(renderEncoder); + quadRotateDrawer->Draw(renderEncoder, lastFrameTexture, vtx, false); + } else { + quadPipeline->BindPipeline(renderEncoder); + quadDrawer->Draw(renderEncoder, lastFrameTexture, vtx, false); + } + + [renderEncoder endEncoding]; + + // Copy from render target to buffer + id blitEncoder = [commandBuffer blitCommandEncoder]; + [blitEncoder setLabel:@"GetLastFrame Blit"]; + + MTLOrigin sourceOrigin = MTLOriginMake(0, 0, 0); + MTLSize sourceSize = MTLSizeMake(width, height, 1); + + [blitEncoder copyFromTexture:renderTarget + sourceSlice:0 + sourceLevel:0 + sourceOrigin:sourceOrigin + sourceSize:sourceSize + toBuffer:readbackBuffer + destinationOffset:0 + destinationBytesPerRow:bytesPerRow + destinationBytesPerImage:bufferSize]; + + [blitEncoder endEncoding]; + + [commandBuffer commit]; + [commandBuffer waitUntilCompleted]; + + if (commandBuffer.status != MTLCommandBufferStatusCompleted) { + NSError *error = commandBuffer.error; + WARN_LOG(RENDERER, "MetalContext::GetLastFrame: Command buffer failed: %s", + error ? error.localizedDescription.UTF8String : "Unknown error"); + return false; + } + + // Read back the data + const u8 *img = (const u8 *)[readbackBuffer contents]; + data.clear(); + + data.reserve(width * height * 3); + // RGBA -> RGB conversion + for (int y = 0; y < height; y++) { + for (int x = 0; x < width; x++) { + data.push_back(*img++); // R + data.push_back(*img++); // G + data.push_back(*img++); // B + img++; // Skip A + } + } + + return true; +} diff --git a/core/rend/metal/metal_drawer.h b/core/rend/metal/metal_drawer.h new file mode 100644 index 0000000000..7de2982880 --- /dev/null +++ b/core/rend/metal/metal_drawer.h @@ -0,0 +1,315 @@ +/* + Copyright 2025 flyinghead + + This file is part of Flycast. + + Flycast is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + Flycast is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Flycast. If not, see . +*/ + +#pragma once + +#include "metal_texture.h" +#include "metal_shaders.h" +#include "metal_pipeline.h" +#include "metal_buffer.h" +#include "metal_commandpool.h" +#include "rend/tileclip.h" +#include "rend/transform_matrix.h" +#include "rend/sorter.h" +#include "hw/pvr/pvr_mem.h" +#include + +class MetalBaseDrawer +{ +public: + void SetCommandPool(MetalCommandPool *commandPool) { this->commandPool = commandPool; } + +protected: + TileClipping SetTileClip(id encoder, u32 val, MTLScissorRect& clipRect); + void SetBaseScissor(MTLViewport viewport); + + void SetScissor(id encoder, const MTLScissorRect& scissor) + { + if (scissor.x != currentScissor.x || + scissor.y != currentScissor.y || + scissor.width != currentScissor.width || + scissor.height != currentScissor.height) + { + [encoder setScissorRect:scissor]; + currentScissor = scissor; + } + } + + MetalBufferData* GetMainBuffer(u32 size) + { + MetalBufferData *buffer; + if (!mainBuffers.empty()) + { + buffer = mainBuffers.back().release(); + mainBuffers.pop_back(); + if (buffer->bufferSize < size) { + commandPool->addToFlight(new MetalDeleter(buffer)); + u32 newSize = (u32)buffer->bufferSize; + while (newSize < size) + newSize *= 2; + INFO_LOG(RENDERER, "Increasing main buffer size %zd -> %d", buffer->bufferSize, newSize); + delete buffer; + + buffer = new MetalBufferData(newSize); + } + } + else + { + buffer = new MetalBufferData(std::max(512 * 1024u, size)); + } + + class BufferHolder : public MetalDeletable + { + public: + BufferHolder(MetalBufferData *buffer, MetalBaseDrawer *drawer) : buffer(buffer), drawer(drawer) {} + + ~BufferHolder() override { + drawer->mainBuffers.emplace_back(buffer); + } + + private: + MetalBufferData *buffer; + MetalBaseDrawer *drawer; + }; + commandPool->addToFlight(new BufferHolder(buffer, this)); + + return buffer; + } + + template + T MakeFragmentUniforms() + { + T fragUniforms; + + //VERT and RAM fog color constants + FOG_COL_VERT.getRGBColor(fragUniforms.sp_FOG_COL_VERT); + FOG_COL_RAM.getRGBColor(fragUniforms.sp_FOG_COL_RAM); + + //Fog density constant + fragUniforms.sp_FOG_DENSITY = FOG_DENSITY.get() * config::ExtraDepthScale; + + pvrrc.fog_clamp_min.getRGBAColor(fragUniforms.colorClampMin); + pvrrc.fog_clamp_max.getRGBAColor(fragUniforms.colorClampMax); + + fragUniforms.cp_AlphaTestValue = (PT_ALPHA_REF & 0xFF) / 255.0f; + + return fragUniforms; + } + + template + void packNaomi2Uniforms(MetalBufferPacker& packer, Offsets& offsets, std::vector& n2uniforms, bool trModVolIncluded) + { + size_t n2UniformSize = sizeof(MetalN2VertexShaderUniforms) + MetalBufferPacker::align(sizeof(MetalN2VertexShaderUniforms), 16); + int items = pvrrc.global_param_op.size() + pvrrc.global_param_pt.size() + pvrrc.global_param_tr.size() + pvrrc.global_param_mvo.size(); + if (trModVolIncluded) + items += pvrrc.global_param_mvo_tr.size(); + n2uniforms.resize(items * n2UniformSize); + size_t bufIdx = 0; + auto addUniform = [&](const PolyParam& pp, int polyNumber) { + if (pp.isNaomi2()) + { + MetalN2VertexShaderUniforms& uni = *(MetalN2VertexShaderUniforms *)&n2uniforms[bufIdx]; + memcpy(glm::value_ptr(uni.mvMat), pvrrc.matrices[pp.mvMatrix].mat, sizeof(uni.mvMat)); + memcpy(glm::value_ptr(uni.normalMat), pvrrc.matrices[pp.normalMatrix].mat, sizeof(uni.normalMat)); + memcpy(glm::value_ptr(uni.projMat), pvrrc.matrices[pp.projMatrix].mat, sizeof(uni.projMat)); + uni.bumpMapping = pp.pcw.Texture == 1 && pp.tcw.PixelFmt == PixelBumpMap; + uni.polyNumber = polyNumber; + for (size_t i = 0; i < 2; i++) + { + uni.envMapping[i] = pp.envMapping[i]; + uni.glossCoef[i] = pp.glossCoef[i]; + uni.constantColor[i] = pp.constantColor[i]; + } + } + bufIdx += n2UniformSize; + }; + for (const PolyParam& pp : pvrrc.global_param_op) + addUniform(pp, 0); + size_t ptOffset = bufIdx; + for (const PolyParam& pp : pvrrc.global_param_pt) + addUniform(pp, 0); + size_t trOffset = bufIdx; + if (!pvrrc.global_param_tr.empty()) + { + u32 firstVertexIdx = pvrrc.idx[pvrrc.global_param_tr[0].first]; + for (const PolyParam& pp : pvrrc.global_param_tr) + addUniform(pp, ((&pp - &pvrrc.global_param_tr[0]) << 17) - firstVertexIdx); + } + size_t mvOffset = bufIdx; + for (const ModifierVolumeParam& mvp : pvrrc.global_param_mvo) + { + if (mvp.isNaomi2()) + { + MetalN2VertexShaderUniforms& uni = *(MetalN2VertexShaderUniforms *)&n2uniforms[bufIdx]; + memcpy(glm::value_ptr(uni.mvMat), pvrrc.matrices[mvp.mvMatrix].mat, sizeof(uni.mvMat)); + memcpy(glm::value_ptr(uni.projMat), pvrrc.matrices[mvp.projMatrix].mat, sizeof(uni.projMat)); + } + bufIdx += n2UniformSize; + } + size_t trMvOffset = bufIdx; + if (trModVolIncluded) + for (const ModifierVolumeParam& mvp : pvrrc.global_param_mvo_tr) + { + if (mvp.isNaomi2()) + { + MetalN2VertexShaderUniforms& uni = *(MetalN2VertexShaderUniforms *)&n2uniforms[bufIdx]; + memcpy(glm::value_ptr(uni.mvMat), pvrrc.matrices[mvp.mvMatrix].mat, sizeof(uni.mvMat)); + memcpy(glm::value_ptr(uni.projMat), pvrrc.matrices[mvp.projMatrix].mat, sizeof(uni.projMat)); + } + bufIdx += n2UniformSize; + } + offsets.naomi2OpaqueOffset = packer.addUniform(n2uniforms.data(), bufIdx); + offsets.naomi2PunchThroughOffset = offsets.naomi2OpaqueOffset + ptOffset; + offsets.naomi2TranslucentOffset = offsets.naomi2OpaqueOffset + trOffset; + offsets.naomi2ModVolOffset = offsets.naomi2OpaqueOffset + mvOffset; + offsets.naomi2TrModVolOffset = offsets.naomi2OpaqueOffset + trMvOffset; + } + + u64 packNaomi2Lights(MetalBufferPacker& packer) + { + u64 offset = -1; + + if (!pvrrc.lightModels.empty()) + { + offset = packer.addUniform(&pvrrc.lightModels[0], pvrrc.lightModels.size() * sizeof(decltype(pvrrc.lightModels[0]))); + } + + return offset; + } + + MTLCullMode toMetalCullMode(int cullMode) { + return cullMode == 3 ? MTLCullModeBack + : cullMode == 2 ? MTLCullModeFront + : MTLCullModeNone; + } + + MTLScissorRect baseScissor {}; + MTLScissorRect currentScissor {}; + TransformMatrix matrices; + MetalCommandPool *commandPool = nullptr; + std::vector> mainBuffers; +}; + +class MetalDrawer : public MetalBaseDrawer +{ +public: + virtual ~MetalDrawer() = default; + + bool Draw(const MetalTexture *fogTexture, const MetalTexture *paletteTexture); + virtual void EndRenderPass() { + renderPassStarted = false; + } + + virtual void Term() { + + } + +protected: + virtual id BeginRenderPass() = 0; + void Init(MetalSamplers *samplers, MetalPipelineManager pipelineManager) { + this->samplers = samplers; + this->pipelineManager = std::make_unique(pipelineManager); + } + + int GetCurrentImage() const { return imageIndex; } + + id currentEncoder = nil; + MetalSamplers *samplers = nullptr; + bool renderPassStarted = false; + +private: + void DrawPoly(id encoder, u32 listType, bool sortTriangles, const PolyParam& poly, u32 first, u32 count); + void DrawSorted(id encoder, const std::vector& polys, u32 first, u32 last, bool multipass); + void DrawList(id encoder, u32 listType, bool sortTriangles, const std::vector& polys, u32 first, u32 last); + void DrawModVols(id encoder, int first, int count); + void UploadMainBuffer(const MetalVertexShaderUniforms& vertexUniforms, const MetalFragmentShaderUniforms& fragmentUniforms); + + int imageIndex = 0; + struct { + u64 indexOffset = 0; + u64 modVolOffset = 0; + u64 vertexUniformOffset = 0; + u64 fragmentUniformOffset = 0; + u64 naomi2OpaqueOffset = 0; + u64 naomi2PunchThroughOffset = 0; + u64 naomi2TranslucentOffset = 0; + u64 naomi2ModVolOffset = 0; + u64 naomi2TrModVolOffset = 0; + u64 lightsOffset = 0; + } offsets; + id curMainBuffer = nil; + std::unique_ptr pipelineManager = nullptr; + bool dithering = false; +}; + +class MetalScreenDrawer : public MetalDrawer +{ +public: + void Init(MetalSamplers *samplers, MetalShaders *shaders, const MTLViewport& viewport); + + void EndRenderPass() override; + bool PresentFrame() + { + EndRenderPass(); + if (!frameRendered) + return false; + frameRendered = false; + MetalContext::Instance()->PresentFrame(framebuffers[GetCurrentImage()], viewport, aspectRatio); + + return true; + } + +protected: + id BeginRenderPass() override; + +private: + std::vector> framebuffers; + std::vector loadPassDescriptors; + std::vector clearPassDescriptors; + id depthAttachment; + MTLViewport viewport; + MetalShaders *shaderManager = nullptr; + std::vector clearNeeded; + bool frameRendered = false; + float aspectRatio = 0.f; + bool emulateFramebuffer = false; +}; + +class MetalTextureDrawer : public MetalDrawer +{ +public: + void Init(MetalSamplers *samplers, MetalShaders *shaders, MetalTextureCache *textureCache); + + void EndRenderPass() override; + +protected: + id BeginRenderPass() override; + +private: + u32 width = 0; + u32 height = 0; + u32 textureAddr = 0; + + MetalTexture *texture = nullptr; + std::vector> framebuffers; + MTLRenderPassDescriptor *rttPassDescriptor = nil; + id colorAttachment; + id depthAttachment; + MetalTextureCache *textureCache = nullptr; +}; \ No newline at end of file diff --git a/core/rend/metal/metal_drawer.mm b/core/rend/metal/metal_drawer.mm new file mode 100644 index 0000000000..f815bdb6bb --- /dev/null +++ b/core/rend/metal/metal_drawer.mm @@ -0,0 +1,741 @@ +/* + Copyright 2025 flyinghead + + This file is part of Flycast. + + Flycast is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + Flycast is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Flycast. If not, see . +*/ + +#include "metal_drawer.h" + +TileClipping MetalBaseDrawer::SetTileClip(id encoder, u32 val, MTLScissorRect& clipRect) { + int rect[4] = {}; + TileClipping clipMode = GetTileClip(val, matrices.GetViewportMatrix(), rect); + if (clipMode != TileClipping::Off) + { + clipRect.x = rect[0]; + clipRect.y = rect[1]; + clipRect.width = rect[2]; + clipRect.height = rect[3]; + } + if (clipMode == TileClipping::Outside) + SetScissor(encoder, clipRect); + else + SetScissor(encoder, baseScissor); + + return clipMode; +} + +void MetalBaseDrawer::SetBaseScissor(MTLViewport viewport) { + bool wide_screen_on = config::Widescreen + && !matrices.IsClipped() && !config::Rotate90 && !config::EmulateFramebuffer; + if (!wide_screen_on) + { + float width; + float height; + float min_x; + float min_y; + glm::vec4 clip_min(pvrrc.fb_X_CLIP.min, pvrrc.fb_Y_CLIP.min, 0, 1); + glm::vec4 clip_dim(pvrrc.fb_X_CLIP.max - pvrrc.fb_X_CLIP.min + 1, + pvrrc.fb_Y_CLIP.max - pvrrc.fb_Y_CLIP.min + 1, 0, 0); + clip_min = matrices.GetScissorMatrix() * clip_min; + clip_dim = matrices.GetScissorMatrix() * clip_dim; + + min_x = clip_min[0]; + min_y = clip_min[1]; + width = clip_dim[0]; + height = clip_dim[1]; + if (width < 0) + { + min_x += width; + width = -width; + } + if (height < 0) + { + min_y += height; + height = -height; + } + + baseScissor = MTLScissorRect(); + baseScissor.x = std::max(lroundf(min_x), 0L); + baseScissor.y = std::max(lroundf(min_y), 0L); + baseScissor.width = std::max(lroundf(width), 0L); + baseScissor.height = std::max(lroundf(height), 0L); + } + else + { + baseScissor = MTLScissorRect(); + baseScissor.x = 0; + baseScissor.y = 0; + baseScissor.width = viewport.width; + baseScissor.height = viewport.height; + } +} + +void MetalDrawer::DrawPoly(id encoder, u32 listType, bool sortTriangles, const PolyParam &poly, u32 first, u32 count) +{ + MTLScissorRect scissorRect {}; + TileClipping tileClip = SetTileClip(encoder, poly.tileclip, scissorRect); + + float trilinearAlpha = 1.0f; + if (poly.tsp.FilterMode > 1 && poly.pcw.Texture && listType != ListType_Punch_Through && poly.tcw.MipMapped == 1) + { + trilinearAlpha = 0.25f * (poly.tsp.MipMapD & 0x3); + if (poly.tsp.FilterMode == 2) + // Trilinear pass A + trilinearAlpha = 1.0f - trilinearAlpha; + } + int gpuPalette = poly.texture == nullptr || !poly.texture->gpuPalette ? 0 + : poly.tsp.FilterMode + 1; + float palette_index = 0.0f; + if (gpuPalette != 0) + { + if (config::TextureFiltering == 1) + gpuPalette = 1; + else if (config::TextureFiltering == 2) + gpuPalette = 2; + if (poly.tcw.PixelFmt == PixelPal4) + palette_index = float(poly.tcw.PalSelect << 4) / 1023.0f; + else + palette_index = float(poly.tcw.PalSelect >> 4 << 8) / 1023.0f; + } + + std::array pushConstants; + + if (tileClip == TileClipping::Inside || trilinearAlpha != 1.0f || gpuPalette != 0) + { + pushConstants = { + (float)scissorRect.x, + (float)scissorRect.y, + (float)scissorRect.x + (float)scissorRect.width, + (float)scissorRect.y + (float)scissorRect.height, + trilinearAlpha, + palette_index + }; + } else { + pushConstants = { 0, 0, 0, 0, 0, 0 }; + } + + [encoder setFragmentBytes:pushConstants.data() length:sizeof(pushConstants) + MetalBufferPacker::align(sizeof(pushConstants), 16) atIndex:1]; + + bool shadowed = listType == ListType_Opaque || listType == ListType_Punch_Through; + + [encoder setRenderPipelineState:pipelineManager->GetPipeline(listType, sortTriangles, poly, gpuPalette, dithering)]; + [encoder setDepthStencilState:pipelineManager->GetDepthStencilStates(listType, sortTriangles, shadowed, poly)]; + [encoder setCullMode:toMetalCullMode(poly.isp.CullMode)]; + + if (shadowed) { + if (poly.pcw.Shadow != 0) { + [encoder setStencilReferenceValue:0x80]; + } else { + [encoder setStencilReferenceValue:0]; + } + } + + if (poly.texture != nullptr) { + auto texture = ((MetalTexture *)poly.texture)->GetReadOnlyTexture(); + [encoder setFragmentTexture:texture atIndex:0]; + + // Texture sampler + [encoder setFragmentSamplerState:samplers->GetSampler(poly, listType == ListType_Punch_Through) atIndex:0]; + } + + if (poly.pcw.Texture || poly.isNaomi2()) + { + u64 offset = 0; + u32 index = 0; + if (poly.isNaomi2()) + { + switch (listType) + { + case ListType_Opaque: + offset = offsets.naomi2OpaqueOffset; + index = &poly - &pvrrc.global_param_op[0]; + break; + case ListType_Punch_Through: + offset = offsets.naomi2PunchThroughOffset; + index = &poly - &pvrrc.global_param_pt[0]; + break; + case ListType_Translucent: + offset = offsets.naomi2TranslucentOffset; + index = &poly - &pvrrc.global_param_tr[0]; + break; + } + } + + size_t size = sizeof(MetalN2VertexShaderUniforms) + MetalBufferPacker::align(sizeof(MetalN2VertexShaderUniforms), 16); + [encoder setVertexBuffer:curMainBuffer offset:offset + index * size atIndex:1]; + + if (offsets.lightsOffset != -1) { + size = sizeof(N2LightModel) + MetalBufferPacker::align(sizeof(N2LightModel), 16); + [encoder setVertexBuffer:curMainBuffer offset:offsets.lightsOffset + poly.lightModel * size atIndex:2]; + } + } + + MTLPrimitiveType primitive = sortTriangles && !config::PerStripSorting ? MTLPrimitiveTypeTriangle : MTLPrimitiveTypeTriangleStrip; + + [encoder drawIndexedPrimitives:primitive + indexCount:count + indexType:MTLIndexTypeUInt32 + indexBuffer:curMainBuffer + indexBufferOffset:offsets.indexOffset + first * sizeof(u32)]; +} + +void MetalDrawer::DrawSorted(id encoder, const std::vector &polys, u32 first, u32 last, bool multipass) +{ + if (first == last) + return; + + [encoder pushDebugGroup:@"DrawSorted"]; + + for (u32 idx = first; idx < last; idx++) + DrawPoly(encoder, ListType_Translucent, true, pvrrc.global_param_tr[polys[idx].polyIndex], polys[idx].first, polys[idx].count); + if (multipass && config::TranslucentPolygonDepthMask) + { + // Write to the depth buffer now. The next render pass might need it. (Cosmic Smash) + for (u32 idx = first; idx < last; idx++) + { + const SortedTriangle& param = polys[idx]; + const PolyParam& polyParam = pvrrc.global_param_tr[param.polyIndex]; + if (polyParam.isp.ZWriteDis) + continue; + [encoder setRenderPipelineState:pipelineManager->GetDepthPassPipeline(polyParam.isNaomi2())]; + [encoder setDepthStencilState:pipelineManager->GetDepthPassDepthStencilStates(polyParam.isNaomi2())]; + + MTLScissorRect scissorRect {}; + SetTileClip(encoder, polyParam.tileclip, scissorRect); + + [encoder setCullMode:toMetalCullMode(polyParam.isp.CullMode)]; + + [encoder drawIndexedPrimitives:MTLPrimitiveTypeTriangle + indexCount:param.count + indexType:MTLIndexTypeUInt32 + indexBuffer:curMainBuffer + indexBufferOffset:offsets.indexOffset + param.first * sizeof(u32)]; + } + } + + [encoder popDebugGroup]; +} + +void MetalDrawer::DrawList(id encoder, u32 listType, bool sortTriangles, const std::vector &polys, u32 first, u32 last) +{ + if (first == last) + return; + + [encoder pushDebugGroup:@"DrawList"]; + + const PolyParam *pp_end = polys.data() + last; + for (const PolyParam *pp = &polys[first]; pp != pp_end; pp++) + if (pp->count > 2) + DrawPoly(encoder, listType, sortTriangles, *pp, pp->first, pp->count); + + [encoder popDebugGroup]; +} + +void MetalDrawer::DrawModVols(id encoder, int first, int count) +{ + if (count == 0 || pvrrc.modtrig.empty() || !config::ModifierVolumes) + return; + + [encoder pushDebugGroup:@"DrawModVols"]; + [encoder setVertexBufferOffset:offsets.modVolOffset atIndex:30]; + + ModifierVolumeParam* params = &pvrrc.global_param_mvo[first]; + + int mod_base = -1; + id state; + id depth_state; + + const std::array pushConstants = { 1 - FPU_SHAD_SCALE.scale_factor / 256.f }; + [encoder setFragmentBytes:pushConstants.data() length:sizeof(pushConstants) + MetalBufferPacker::align(sizeof(pushConstants), 16) atIndex:1]; + + for (int cmv = 0; cmv < count; cmv++) { + ModifierVolumeParam& param = params[cmv]; + + if (param.count == 0) + continue; + + u32 mv_mode = param.isp.DepthMode; + + if (mod_base == -1) + mod_base = param.first; + + if (!param.isp.VolumeLast && mv_mode > 0) { + state = pipelineManager->GetModifierVolumePipeline(ModVolMode::Or, param.isNaomi2()); // OR'ing (open volume or quad) + depth_state = pipelineManager->GetModVolDepthStencilStates(ModVolMode::Or, param.isNaomi2()); + } else { + state = pipelineManager->GetModifierVolumePipeline(ModVolMode::Xor, param.isNaomi2()); // XOR'ing (closed volume) + depth_state = pipelineManager->GetModVolDepthStencilStates(ModVolMode::Xor, param.isNaomi2()); + } + + [encoder setRenderPipelineState:state]; + [encoder setDepthStencilState:depth_state]; + [encoder setCullMode:toMetalCullMode(param.isp.CullMode)]; + [encoder setStencilReferenceValue:2]; + MTLScissorRect scissorRect {}; + SetTileClip(encoder, param.tileclip, scissorRect); + // TODO inside clipping + + [encoder drawPrimitives:MTLPrimitiveTypeTriangle + vertexStart:param.first * 3 + vertexCount:param.count * 3]; + + if (mv_mode == 1 || mv_mode == 2) + { + // Sum the area + state = pipelineManager->GetModifierVolumePipeline(mv_mode == 1 ? ModVolMode::Inclusion : ModVolMode::Exclusion, param.isNaomi2()); + depth_state = pipelineManager->GetModVolDepthStencilStates(mv_mode == 1 ? ModVolMode::Inclusion : ModVolMode::Exclusion, param.isNaomi2()); + [encoder setRenderPipelineState:state]; + [encoder setDepthStencilState:depth_state]; + [encoder setCullMode:toMetalCullMode(param.isp.CullMode)]; + [encoder setStencilReferenceValue:1]; + [encoder drawPrimitives:MTLPrimitiveTypeTriangle + vertexStart: mod_base * 3 + vertexCount: (param.first + param.count - mod_base) * 3]; + mod_base = -1; + } + } + [encoder setVertexBufferOffset:0 atIndex:30]; + + state = pipelineManager->GetModifierVolumePipeline(ModVolMode::Final, false); + depth_state = pipelineManager->GetModVolDepthStencilStates(ModVolMode::Final, false); + [encoder setRenderPipelineState:state]; + [encoder setDepthStencilState:depth_state]; + [encoder setCullMode:toMetalCullMode(0)]; + [encoder setStencilReferenceValue:0x81]; + [encoder drawIndexedPrimitives:MTLPrimitiveTypeTriangleStrip + indexCount:4 + indexType:MTLIndexTypeUInt32 + indexBuffer:curMainBuffer + indexBufferOffset:offsets.indexOffset]; + + [encoder popDebugGroup]; +} + +void MetalDrawer::UploadMainBuffer(const MetalVertexShaderUniforms &vertexUniforms, const MetalFragmentShaderUniforms &fragmentUniforms) { + MetalBufferPacker packer; + + // Vertex + packer.add(pvrrc.verts.data(), pvrrc.verts.size() * sizeof(decltype(*pvrrc.verts.data()))); + // Modifier Volumes + offsets.modVolOffset = packer.add(pvrrc.modtrig.data(), pvrrc.modtrig.size() * sizeof(decltype(*pvrrc.modtrig.data()))); + // Index + offsets.indexOffset = packer.add(pvrrc.idx.data(), pvrrc.idx.size() * sizeof(decltype(*pvrrc.idx.data()))); + // Uniform buffers + offsets.vertexUniformOffset = packer.addUniform(&vertexUniforms, sizeof(vertexUniforms)); + offsets.fragmentUniformOffset = packer.addUniform(&fragmentUniforms, sizeof(fragmentUniforms)); + + std::vector n2uniforms; + if (settings.platform.isNaomi2()) + { + packNaomi2Uniforms(packer, offsets, n2uniforms, false); + offsets.lightsOffset = packNaomi2Lights(packer); + } + + MetalBufferData *buffer = new MetalBufferData(packer.size()); + packer.upload(*buffer); + curMainBuffer = buffer->buffer; +} + +bool MetalDrawer::Draw(const MetalTexture *fogTexture, const MetalTexture *paletteTexture) { + MetalFragmentShaderUniforms fragUniforms = MakeFragmentUniforms(); + dithering = config::EmulateFramebuffer && pvrrc.fb_W_CTRL.fb_dither && pvrrc.fb_W_CTRL.fb_packmode <= 3; + if (dithering) { + switch (pvrrc.fb_W_CTRL.fb_packmode) + { + case 0: // 0555 KRGB 16 bit + case 3: // 1555 ARGB 16 bit + fragUniforms.ditherDivisor[0] = fragUniforms.ditherDivisor[1] = fragUniforms.ditherDivisor[2] = 2.f; + break; + case 1: // 565 RGB 16 bit + fragUniforms.ditherDivisor[0] = fragUniforms.ditherDivisor[2] = 2.f; + fragUniforms.ditherDivisor[1] = 4.f; + break; + case 2: // 4444 ARGB 16 bit + fragUniforms.ditherDivisor[0] = fragUniforms.ditherDivisor[1] = fragUniforms.ditherDivisor[2] = 1.f; + break; + default: + break; + } + fragUniforms.ditherDivisor[3] = 1.f; + } + + currentScissor = MTLScissorRect {}; + + @autoreleasepool { + id renderEncoder = BeginRenderPass(); + [renderEncoder retain]; + + [renderEncoder setFragmentTexture:fogTexture->GetTexture() atIndex:2]; + [renderEncoder setFragmentTexture:paletteTexture->GetTexture() atIndex:3]; + + // Fog sampler + TSP fogTsp = {}; + fogTsp.FilterMode = 1; + fogTsp.ClampU = 1; + fogTsp.ClampV = 1; + [renderEncoder setFragmentSamplerState:samplers->GetSampler(fogTsp) atIndex:2]; + + // Palette sampler + TSP palTsp = {}; + palTsp.FilterMode = 0; + palTsp.ClampU = 1; + palTsp.ClampV = 1; + [renderEncoder setFragmentSamplerState:samplers->GetSampler(palTsp) atIndex:3]; + + setFirstProvokingVertex(pvrrc); + + // Upload vertex and index buffers + MetalVertexShaderUniforms vtxUniforms {}; + vtxUniforms.ndcMat = matrices.GetNormalMatrix(); + + [renderEncoder setFrontFacingWinding:MTLWindingCounterClockwise]; + + UploadMainBuffer(vtxUniforms, fragUniforms); + + [renderEncoder setVertexBuffer:curMainBuffer offset:0 atIndex:30]; + [renderEncoder setVertexBuffer:curMainBuffer offset:offsets.vertexUniformOffset atIndex:0]; + [renderEncoder setFragmentBuffer:curMainBuffer offset:offsets.fragmentUniformOffset atIndex:0]; + + RenderPass previous_pass {}; + for (int render_pass = 0; render_pass < (int)pvrrc.render_passes.size(); render_pass++) { + const RenderPass& current_pass = pvrrc.render_passes[render_pass]; + + DEBUG_LOG(RENDERER, "Render pass %d OP %d PT %d TR %d MV %d autosort %d", render_pass + 1, + current_pass.op_count - previous_pass.op_count, + current_pass.pt_count - previous_pass.pt_count, + current_pass.tr_count - previous_pass.tr_count, + current_pass.mvo_count - previous_pass.mvo_count, current_pass.autosort); + DrawList(renderEncoder, ListType_Opaque, false, pvrrc.global_param_op, previous_pass.op_count, current_pass.op_count); + DrawList(renderEncoder, ListType_Punch_Through, false, pvrrc.global_param_pt, previous_pass.pt_count, current_pass.pt_count); + DrawModVols(renderEncoder, previous_pass.mvo_count, current_pass.mvo_count - previous_pass.mvo_count); + if (current_pass.autosort) { + if (!config::PerStripSorting) + DrawSorted(renderEncoder, pvrrc.sortedTriangles, previous_pass.sorted_tr_count, current_pass.sorted_tr_count, render_pass + 1 < (int)pvrrc.render_passes.size()); + else + DrawList(renderEncoder, ListType_Translucent, true, pvrrc.global_param_tr, previous_pass.tr_count, current_pass.tr_count); + } else { + DrawList(renderEncoder, ListType_Translucent, false, pvrrc.global_param_tr, previous_pass.tr_count, current_pass.tr_count); + } + previous_pass = current_pass; + } + } + + curMainBuffer = nil; + + return !pvrrc.isRTT; +} + +void MetalTextureDrawer::Init(MetalSamplers *samplers, MetalShaders *shaders, MetalTextureCache *textureCache) +{ + MetalDrawer::Init(samplers, MetalPipelineManager(shaders)); + + this->textureCache = textureCache; + + rttPassDescriptor = [[MTLRenderPassDescriptor alloc] init]; +} + +id MetalTextureDrawer::BeginRenderPass() { + DEBUG_LOG(RENDERER, "RenderToTexture packmode=%d stride=%d - %d x %d @ %06x", pvrrc.fb_W_CTRL.fb_packmode, pvrrc.fb_W_LINESTRIDE * 8, + pvrrc.fb_X_CLIP.max + 1, pvrrc.fb_Y_CLIP.max + 1, pvrrc.fb_W_SOF1 & VRAM_MASK); + matrices.CalcMatrices(&pvrrc); + + textureAddr = pvrrc.fb_W_SOF1 & VRAM_MASK; + u32 origWidth = pvrrc.getFramebufferWidth(); + u32 origHeight = pvrrc.getFramebufferHeight(); + u32 upscaledWidth = origWidth; + u32 upscaledHeight = origHeight; + u32 widthPow2; + u32 heightPow2; + getRenderToTextureDimensions(upscaledWidth, upscaledHeight, widthPow2, heightPow2); + + id commandBuffer = commandPool->Allocate(); + + if (!depthAttachment || widthPow2 > depthAttachment.width || heightPow2 > depthAttachment.height) + { + MTLTextureDescriptor *depthDescriptor = [[MTLTextureDescriptor alloc] init]; + depthDescriptor.width = widthPow2; + depthDescriptor.height = heightPow2; + depthDescriptor.pixelFormat = MTLPixelFormatDepth32Float_Stencil8; + depthDescriptor.usage = MTLTextureUsageRenderTarget; + depthDescriptor.storageMode = MTLStorageModePrivate; + + depthAttachment = [MetalContext::Instance()->GetDevice() newTextureWithDescriptor:depthDescriptor]; + [depthAttachment setLabel:@"Rtt Depth Attachment"]; + } + + id colorImage; + + if (!config::RenderToTextureBuffer) + { + texture = textureCache->getRTTexture(textureAddr, pvrrc.fb_W_CTRL.fb_packmode, origWidth, origHeight); + if (textureCache->IsInFlight(texture, false)) + { + texture->CreateReadOnlyCopy(commandBuffer); + texture->deferDeleteResource(commandPool); + } + textureCache->SetInFlight(texture); + + // Check if we need to recreate the texture + bool needsRecreation = !texture->GetTexture() || + texture->GetTexture().width != widthPow2 || + texture->GetTexture().height != heightPow2; + + if (needsRecreation) + { + MTLTextureDescriptor *colorDescriptor = [[MTLTextureDescriptor alloc] init]; + colorDescriptor.width = widthPow2; + colorDescriptor.height = heightPow2; + colorDescriptor.pixelFormat = MTLPixelFormatRGBA8Unorm; + colorDescriptor.usage = MTLTextureUsageRenderTarget | MTLTextureUsageShaderRead; + colorDescriptor.storageMode = MTLStorageModePrivate; + + id newTexture = [MetalContext::Instance()->GetDevice() newTextureWithDescriptor:colorDescriptor]; + [newTexture setLabel:@"Rtt Color Attachment"]; + texture->SetTexture(newTexture, widthPow2, heightPow2); + } + colorImage = texture->GetTexture(); + } + else + { + if (!colorAttachment || widthPow2 > colorAttachment.width || heightPow2 > colorAttachment.height) + { + MTLTextureDescriptor *colorDescriptor = [[MTLTextureDescriptor alloc] init]; + colorDescriptor.width = widthPow2; + colorDescriptor.height = heightPow2; + colorDescriptor.pixelFormat = MTLPixelFormatRGBA8Unorm; + colorDescriptor.usage = MTLTextureUsageRenderTarget; + colorDescriptor.storageMode = MTLStorageModePrivate; + + colorAttachment = [MetalContext::Instance()->GetDevice() newTextureWithDescriptor:colorDescriptor]; + [colorAttachment setLabel:@"Rtt Color Attachment"]; + } + colorImage = colorAttachment; + } + + auto colorAttachmentDesc = rttPassDescriptor.colorAttachments[0]; + [colorAttachmentDesc setTexture:colorImage]; + [colorAttachmentDesc setLoadAction:MTLLoadActionClear]; + [colorAttachmentDesc setStoreAction:MTLStoreActionStore]; + [colorAttachmentDesc setClearColor:MTLClearColorMake(0.0, 0.0, 0.0, 1.0)]; + + auto depthAttachmentDesc = rttPassDescriptor.depthAttachment; + [depthAttachmentDesc setTexture:depthAttachment]; + [depthAttachmentDesc setLoadAction:MTLLoadActionClear]; + [depthAttachmentDesc setStoreAction:MTLStoreActionDontCare]; + [depthAttachmentDesc setClearDepth:0.0]; + + auto stencilAttachmentDesc = rttPassDescriptor.stencilAttachment; + [stencilAttachmentDesc setTexture:depthAttachment]; + [stencilAttachmentDesc setLoadAction:MTLLoadActionClear]; + [stencilAttachmentDesc setStoreAction:MTLStoreActionDontCare]; + [stencilAttachmentDesc setClearStencil:0]; + + currentEncoder = [commandBuffer renderCommandEncoderWithDescriptor:rttPassDescriptor]; + [currentEncoder pushDebugGroup:@"RenderToTexture"]; + + MTLViewport viewport = { + 0.0, + 0.0, + (double)upscaledWidth, + (double)upscaledHeight, + 1.0, + 0.0 + }; + [currentEncoder setViewport:viewport]; + + u32 minX = pvrrc.getFramebufferMinX() * upscaledWidth / origWidth; + u32 minY = pvrrc.getFramebufferMinY() * upscaledHeight / origHeight; + getRenderToTextureDimensions(minX, minY, widthPow2, heightPow2); + baseScissor = MTLScissorRect { minX, minY, upscaledWidth, upscaledHeight }; + [currentEncoder setScissorRect:baseScissor]; + + return currentEncoder; +} + +void MetalTextureDrawer::EndRenderPass() +{ + [currentEncoder popDebugGroup]; + [currentEncoder endEncoding]; + currentEncoder = nil; + + u32 clippedWidth = pvrrc.getFramebufferWidth(); + u32 clippedHeight = pvrrc.getFramebufferHeight(); + + if (config::RenderToTextureBuffer) + { + commandPool->EndFrameAndWait(); + + u16 *dst = (u16 *)&vram[textureAddr]; + + PixelBuffer tmpBuf; + tmpBuf.init(clippedWidth, clippedHeight); + // TODO: WRITE TO BUFFER + WriteTextureToVRam(clippedWidth, clippedHeight, (u8 *)tmpBuf.data(), dst, pvrrc.fb_W_CTRL, pvrrc.fb_W_LINESTRIDE * 8); + } + else + { + commandPool->EndFrame(); + texture->dirty = 0; + texture->unprotectVRam(); + } + + MetalDrawer::EndRenderPass(); +} + +void MetalScreenDrawer::Init(MetalSamplers *samplers, MetalShaders *shaders, const MTLViewport &viewport) { + emulateFramebuffer = config::EmulateFramebuffer; + this->shaderManager = shaders; + + if (this->viewport.height != viewport.height || + this->viewport.width != viewport.width || + this->viewport.originX != viewport.originX || + this->viewport.originY != viewport.originY || + this->viewport.zfar != viewport.zfar || + this->viewport.znear != viewport.znear) { + if (!framebuffers.empty()) { + verify(commandPool != nullptr); + commandPool->addToFlight(new MetalDeleter(std::move(framebuffers))); + } + if (depthAttachment) { + class ResourceDeleter : public MetalDeletable + { + public: + ResourceDeleter(id texture) + { + std::swap(this->texture, texture); + } + + ~ResourceDeleter() override { + [texture setPurgeableState:MTLPurgeableStateEmpty]; + texture = nil; + } + + private: + id texture = nil; + }; + + commandPool->addToFlight(new ResourceDeleter(depthAttachment)); + } + + depthAttachment = nil; + clearPassDescriptors.clear(); + loadPassDescriptors.clear(); + clearNeeded.clear(); + } + this->viewport = viewport; + + if (depthAttachment == nil) + { + MTLTextureDescriptor *descriptor = [[MTLTextureDescriptor alloc] init]; + descriptor.width = viewport.width; + descriptor.height = viewport.height; + descriptor.pixelFormat = MTLPixelFormatDepth32Float_Stencil8; + descriptor.usage = MTLTextureUsageRenderTarget | MTLTextureUsageShaderRead; + + depthAttachment = [MetalContext::Instance()->GetDevice() newTextureWithDescriptor:descriptor]; + } + + if (framebuffers.size() > 3) + { + framebuffers.resize(3); + loadPassDescriptors.resize(3); + clearPassDescriptors.resize(3); + clearNeeded.resize(3); + } + else + { + while (framebuffers.size() < 3) + { + MTLTextureDescriptor *texDescriptor = [[MTLTextureDescriptor alloc] init]; + texDescriptor.width = viewport.width; + texDescriptor.height = viewport.height; + texDescriptor.pixelFormat = MTLPixelFormatRGBA8Unorm; + texDescriptor.usage = MTLTextureUsageRenderTarget | MTLTextureUsageShaderRead; + + id colorAttachment = [MetalContext::Instance()->GetDevice() newTextureWithDescriptor:texDescriptor]; + framebuffers.push_back(colorAttachment); + + MTLRenderPassDescriptor *passDescriptor = [[MTLRenderPassDescriptor alloc] init]; + auto depth = passDescriptor.depthAttachment; + [depth setTexture:depthAttachment]; + [depth setLoadAction:MTLLoadActionClear]; + [depth setStoreAction:MTLStoreActionDontCare]; + + auto stencil = passDescriptor.stencilAttachment; + [stencil setTexture:depthAttachment]; + [stencil setLoadAction:MTLLoadActionClear]; + [stencil setStoreAction:MTLStoreActionDontCare]; + + auto color = passDescriptor.colorAttachments[0]; + [color setTexture:colorAttachment]; + [color setLoadAction:MTLLoadActionLoad]; + [color setStoreAction:MTLStoreActionStore]; + + loadPassDescriptors.push_back(passDescriptor); + + MTLRenderPassDescriptor *clearPassDescriptor = [passDescriptor copy]; + [clearPassDescriptor.colorAttachments[0] setLoadAction:MTLLoadActionClear]; + + clearPassDescriptors.push_back(clearPassDescriptor); + clearNeeded.push_back(true); + } + } + frameRendered = false; + + MetalDrawer::Init(samplers, MetalPipelineManager(shaderManager)); +} + +id MetalScreenDrawer::BeginRenderPass() { + if (!renderPassStarted) + { + frameRendered = false; + id commandBuffer = commandPool->Allocate(); + MTLRenderPassDescriptor* passDescriptor = clearNeeded[GetCurrentImage()] || pvrrc.clearFramebuffer ? clearPassDescriptors[GetCurrentImage()] : loadPassDescriptors[GetCurrentImage()]; + clearNeeded[GetCurrentImage()] = false; + currentEncoder = [commandBuffer renderCommandEncoderWithDescriptor:passDescriptor]; + renderPassStarted = true; + } + + [currentEncoder setViewport:viewport]; + + matrices.CalcMatrices(&pvrrc, viewport.width, viewport.height); + + SetBaseScissor(viewport); + [currentEncoder setScissorRect:baseScissor]; + + return currentEncoder; +} + +void MetalScreenDrawer::EndRenderPass() { + if (!renderPassStarted) + return; + + [currentEncoder endEncoding]; + currentEncoder = nil; + + if (emulateFramebuffer) + { + // TODO: scaleAndWriteFramebuffer + } + else + { + + aspectRatio = getOutputFramebufferAspectRatio(); + } + commandPool->EndFrame(); + MetalDrawer::EndRenderPass(); + frameRendered = true; +} \ No newline at end of file diff --git a/core/rend/metal/metal_driver.h b/core/rend/metal/metal_driver.h new file mode 100644 index 0000000000..65837653bd --- /dev/null +++ b/core/rend/metal/metal_driver.h @@ -0,0 +1,128 @@ +/* + Copyright 2025 flyinghead + + This file is part of Flycast. + + Flycast is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + Flycast is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Flycast. If not, see . +*/ + +#pragma once +#include "ui/imgui_driver.h" +#include "imgui_impl_metal.h" +#include "metal_context.h" +#include + +#include "metal_texture.h" + +class MetalDriver final : public ImGuiDriver { +public: + MetalDriver() { + ImGui_ImplMetal_Init(MetalContext::Instance()->GetDevice()); + } + + void reset() override + { + ImGuiDriver::reset(); + textures.clear(); + ImGui_ImplMetal_Shutdown(); + } + + void newFrame() override { + MetalContext *context = MetalContext::Instance(); + + // Use existing descriptor and encoder when available + if (context->GetDescriptor() != nullptr) { + ImGui_ImplMetal_NewFrame(MetalContext::Instance()->GetDescriptor()); + } else { + drawable = [context->GetLayer() nextDrawable]; + descriptor = [[MTLRenderPassDescriptor alloc] init]; + + [descriptor setDefaultRasterSampleCount:1]; + + auto color = [descriptor colorAttachments][0]; + [color setClearColor:MTLClearColorMake(0.f, 0.f, 0.f, 1.f)]; + [color setTexture:[drawable texture]]; + [color setLoadAction:MTLLoadActionClear]; + [color setStoreAction:MTLStoreActionStore]; + + ImGui_ImplMetal_NewFrame(descriptor); + } + } + + void renderDrawData(ImDrawData *drawData, bool gui_open) override { + MetalContext *context = MetalContext::Instance(); + + if (!context->IsValid()) + return; + + bool rendering = context->IsRendering(); + if (!rendering) + context->NewFrame(); + if (!rendering || newFrameStarted) + { + context->BeginRenderPass(); + if (renderer->RenderLastFrame()) + context->PresentLastFrame(); + } + + ImGui_ImplMetal_RenderDrawData(drawData, context->GetCommandBuffer(), context->GetEncoder()); + + if (!rendering || newFrameStarted) + context->EndFrame(); + newFrameStarted = false; + } + + void present() override { + MetalContext::Instance()->Present(); + } + + ImTextureID getTexture(const std::string &name) override { + auto it = textures.find(name); + if (it != textures.end()) + return (ImTextureID)(intptr_t)(__bridge void*)it->second.texture->GetTexture(); + + return ImTextureID{}; + } + + ImTextureID updateTexture(const std::string &name, const u8 *data, int width, int height, bool nearestSampling) override { + Texture texture(std::make_unique()); + texture.texture->tex_type = TextureType::_8888; + texture.texture->UploadToGPU(width, height, data, false); + + auto textureID = (ImTextureID)(intptr_t)(__bridge void*)texture.texture->GetTexture(); + + textures[name] = std::move(texture); + + return textureID; + } + + void deleteTexture(const std::string &name) override { + auto it = textures.find(name); + [it->second.texture->GetTexture() setPurgeableState:MTLPurgeableStateEmpty]; + textures.erase(name); + } + +private: + struct Texture { + Texture() = default; + Texture(std::unique_ptr&& texture) : texture(std::move(texture)) {} + + std::unique_ptr texture; + }; + + MTLRenderPassDescriptor* descriptor; + id drawable; + std::unordered_map textures; + bool newFrameStarted = false; +}; diff --git a/core/rend/metal/metal_pipeline.h b/core/rend/metal/metal_pipeline.h new file mode 100644 index 0000000000..9869d25c5b --- /dev/null +++ b/core/rend/metal/metal_pipeline.h @@ -0,0 +1,278 @@ +/* + Copyright 2025 flyinghead + + This file is part of Flycast. + + Flycast is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + Flycast is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Flycast. If not, see . +*/ + +#pragma once +#include "types.h" +#include +#include + +#include "cfg/option.h" +#include "hw/pvr/ta_ctx.h" +#include "metal_shaders.h" + +class MetalRenderer; + +enum class ModVolMode { Xor, Or, Inclusion, Exclusion, Final }; + +class MetalPipelineManager +{ +public: + explicit MetalPipelineManager(MetalShaders *shaderManager); + virtual ~MetalPipelineManager() = default; + + void term() + { + pipelines.clear(); + depthPassPipelines.clear(); + depthStencilStates.clear(); + depthPassDepthStencilStates.clear(); + } + + id GetBlitPassPipeline() { + if (blitPassPipeline != nullptr) + return blitPassPipeline; + + CreateBlitPassPipeline(); + + return blitPassPipeline; + } + + id GetDepthPassPipeline(bool naomi2) + { + u32 pipehash = hash(naomi2); + const auto &pipeline = depthPassPipelines.find(pipehash); + if (pipeline != depthPassPipelines.end() && pipeline->second != nullptr) + return pipeline->second; + CreateDepthPassPipeline(naomi2); + + return depthPassPipelines[pipehash]; + } + + id GetModifierVolumePipeline(ModVolMode mode, bool naomi2) + { + u32 pipehash = hash(mode, naomi2); + const auto &pipeline = modVolPipelines.find(pipehash); + if (pipeline != modVolPipelines.end() && pipeline->second != nullptr) + return pipeline->second; + CreateModVolPipeline(mode, naomi2); + + return modVolPipelines[pipehash]; + } + + id GetPipeline(u32 listType, bool sortTriangles, const PolyParam& pp, int gpuPalette, bool dithering) + { + u64 pipehash = hash(listType, sortTriangles, &pp, gpuPalette, dithering); + const auto &pipeline = pipelines.find(pipehash); + if (pipeline != pipelines.end() && pipeline->second != nullptr) + return pipeline->second; + CreatePipeline(listType, sortTriangles, pp, gpuPalette, dithering); + + return pipelines[pipehash]; + } + + id GetModVolDepthStencilStates(ModVolMode mode, bool naomi2) + { + u32 pipehash = hash(mode, naomi2); + const auto &state = modVolStencilStates.find(pipehash); + if (state != modVolStencilStates.end() && state->second != nullptr) + return state->second; + CreateModVolDepthStencilState(mode, naomi2); + + return modVolStencilStates[pipehash]; + } + + id GetDepthPassDepthStencilStates(bool naomi2) + { + u32 pipehash = hash(naomi2); + const auto &state = depthPassDepthStencilStates.find(pipehash); + if (state != depthPassDepthStencilStates.end() && state->second != nullptr) + return state->second; + CreateDepthPassDepthStencilState(naomi2); + + return depthPassDepthStencilStates[pipehash]; + } + + id GetDepthStencilStates(u32 listType, bool sortTriangles, bool shadowed, const PolyParam& pp) + { + u64 pipehash = hash(listType, sortTriangles, shadowed, &pp); + + const auto &state = depthStencilStates.find(pipehash); + if (state != depthStencilStates.end() && state->second != nullptr) + return state->second; + CreateDepthStencilState(listType, sortTriangles, shadowed, pp); + + return depthStencilStates[pipehash]; + } + +private: + void CreateBlitPassPipeline(); + void CreateModVolPipeline(ModVolMode mode, bool naomi2); + void CreateDepthPassPipeline(bool naomi2); + void CreatePipeline(u32 listType, bool sortTriangles, const PolyParam& pp, int gpuPalette, bool dithering); + + void CreateModVolDepthStencilState(ModVolMode mode, bool naomi2); + void CreateDepthPassDepthStencilState(bool naomi2); + void CreateDepthStencilState(u32 listType, bool sortTriangles, bool shadowed, const PolyParam& pp); + + u64 hash(u32 listType, bool sortTriangles, const PolyParam *pp, int gpuPalette, bool dithering) const + { + u64 hash = pp->pcw.Gouraud | (pp->pcw.Offset << 1) | (pp->pcw.Texture << 2) | (pp->pcw.Shadow << 3) + | (((pp->tileclip >> 28) == 3) << 4); + hash |= ((listType >> 1) << 5); + bool ignoreTexAlpha = pp->tsp.IgnoreTexA || pp->tcw.PixelFmt == Pixel565; + hash |= (pp->tsp.ShadInstr << 7) | (ignoreTexAlpha << 9) | (pp->tsp.UseAlpha << 10) + | (pp->tsp.ColorClamp << 11) | ((config::Fog ? pp->tsp.FogCtrl : 2) << 12) | (pp->tsp.SrcInstr << 14) + | (pp->tsp.DstInstr << 17); + hash |= (pp->isp.ZWriteDis << 20) | (pp->isp.CullMode << 21) | (pp->isp.DepthMode << 23); + hash |= ((u64)sortTriangles << 26) | ((u64)gpuPalette << 27) | ((u64)pp->isNaomi2() << 29); + hash |= (u64)(!settings.platform.isNaomi2() && config::NativeDepthInterpolation) << 30; + hash |= (u64)(pp->tcw.PixelFmt == PixelBumpMap) << 31; + hash |= (u64)dithering << 32; + + return hash; + } + u64 hash(u32 listType, bool sortTriangles, bool shadowed, const PolyParam *pp) const + { + u64 hash = pp->pcw.Gouraud | (pp->pcw.Offset << 1) | (pp->pcw.Texture << 2) | (pp->pcw.Shadow << 3) + | (((pp->tileclip >> 28) == 3) << 4); + hash |= ((listType >> 1) << 5); + bool ignoreTexAlpha = pp->tsp.IgnoreTexA || pp->tcw.PixelFmt == Pixel565; + hash |= (pp->tsp.ShadInstr << 7) | (ignoreTexAlpha << 9) | (pp->tsp.UseAlpha << 10) + | (pp->tsp.ColorClamp << 11) | ((config::Fog ? pp->tsp.FogCtrl : 2) << 12) | (pp->tsp.SrcInstr << 14) + | (pp->tsp.DstInstr << 17); + hash |= (pp->isp.ZWriteDis << 20) | (pp->isp.CullMode << 21) | (pp->isp.DepthMode << 23); + hash |= ((u64)sortTriangles << 26) | ((u64)pp->isNaomi2() << 29); + hash |= (u64)(!settings.platform.isNaomi2() && config::NativeDepthInterpolation) << 30; + hash |= (u64)(pp->tcw.PixelFmt == PixelBumpMap) << 31; + + return hash; + } + u32 hash(ModVolMode mode, bool naomi2) const + { + return ((int)mode << 2) | ((int)naomi2 << 5) | ((int)(!settings.platform.isNaomi2() && config::NativeDepthInterpolation) << 6); + } + u32 hash(bool naomi2) const + { + return ((int)naomi2 << 2) | ((int)(!settings.platform.isNaomi2() && config::NativeDepthInterpolation) << 3); + } + + MTLVertexDescriptor* GetMainVertexInputDescriptor(bool full = true, bool naomi2 = false) const + { + MTLVertexDescriptor *vertexDesc = [[MTLVertexDescriptor alloc] init]; + + auto pos = [vertexDesc attributes][0]; // pos + [pos setFormat:MTLVertexFormatFloat3]; + [pos setOffset:offsetof(Vertex, x)]; + [pos setBufferIndex:30]; + + if (full) { + auto col = [vertexDesc attributes][1]; // base color + [col setFormat:MTLVertexFormatUChar4Normalized]; + [col setOffset:offsetof(Vertex, col)]; + [col setBufferIndex:30]; + + auto spc = [vertexDesc attributes][2]; // offset color + [spc setFormat:MTLVertexFormatUChar4Normalized]; + [spc setOffset:offsetof(Vertex, spc)]; + [spc setBufferIndex:30]; + + auto u = [vertexDesc attributes][3]; // tex coord + [u setFormat:MTLVertexFormatFloat2]; + [u setOffset:offsetof(Vertex, u)]; + [u setBufferIndex:30]; + + auto col1 = [vertexDesc attributes][4]; + [col1 setFormat:MTLVertexFormatUChar4Normalized]; + [col1 setOffset:offsetof(Vertex, col1)]; + [col1 setBufferIndex:30]; + + auto spc1 = [vertexDesc attributes][5]; + [spc1 setFormat:MTLVertexFormatUChar4Normalized]; + [spc1 setOffset:offsetof(Vertex, spc1)]; + [spc1 setBufferIndex:30]; + + auto u1 = [vertexDesc attributes][6]; // tex coord + [u1 setFormat:MTLVertexFormatFloat2]; + [u1 setOffset:offsetof(Vertex, u1)]; + [u1 setBufferIndex:30]; + + if (naomi2) { + auto nx = [vertexDesc attributes][7]; // naomi2 normal + [nx setFormat:MTLVertexFormatFloat3]; + [nx setOffset:offsetof(Vertex, nx)]; + [nx setBufferIndex:30]; + } + } + + auto layout = [vertexDesc layouts][30]; + [layout setStride:sizeof(Vertex)]; + [layout setStepRate:1]; + [layout setStepFunction:MTLVertexStepFunctionPerVertex]; + + return vertexDesc; + } + + static inline MTLBlendFactor GetBlendFactor(u32 instr, bool src) { + switch (instr) { + case 0: // zero + return MTLBlendFactorZero; + case 1: // one + return MTLBlendFactorOne; + case 2: // other color + return src ? MTLBlendFactorDestinationColor : MTLBlendFactorSourceColor; + case 3: // inverse other color + return src ? MTLBlendFactorOneMinusDestinationColor : MTLBlendFactorOneMinusSourceColor; + case 4: // src alpha + return MTLBlendFactorSourceAlpha; + case 5: // inverse src alpha + return MTLBlendFactorOneMinusSourceAlpha; + case 6: // dst alpha + return MTLBlendFactorDestinationAlpha; + case 7: // inverse dst alpha + return MTLBlendFactorOneMinusDestinationAlpha; + default: + die("Unsupported blend instruction"); + return MTLBlendFactorZero; + } + } + + id blitPassPipeline = nil; + std::map> pipelines; + std::map> modVolPipelines; + std::map> depthPassPipelines; + + std::map> modVolStencilStates; + std::map> depthStencilStates; + std::map> depthPassDepthStencilStates; + +protected: + MetalShaders *shaderManager; +}; + +static const MTLCompareFunction depthOps[] = +{ + MTLCompareFunctionNever, // 0 Never + MTLCompareFunctionLess, // 1 Less + MTLCompareFunctionEqual, // 2 Equal + MTLCompareFunctionLessEqual, // 3 Less Or Equal + MTLCompareFunctionGreater, // 4 Greater + MTLCompareFunctionNotEqual, // 5 Not Equal + MTLCompareFunctionGreaterEqual, // 6 Greater Or Equal + MTLCompareFunctionAlways, // 7 Always +}; \ No newline at end of file diff --git a/core/rend/metal/metal_pipeline.mm b/core/rend/metal/metal_pipeline.mm new file mode 100644 index 0000000000..db1ed72371 --- /dev/null +++ b/core/rend/metal/metal_pipeline.mm @@ -0,0 +1,307 @@ +/* + Copyright 2025 flyinghead + + This file is part of Flycast. + + Flycast is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + Flycast is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Flycast. If not, see . +*/ + +#include "metal_pipeline.h" + +#include "metal_context.h" +#include "metal_renderer.h" + +MetalPipelineManager::MetalPipelineManager(MetalShaders *shaderManager) { + this->shaderManager = shaderManager; +} + +void MetalPipelineManager::CreateBlitPassPipeline() { + MTLRenderPipelineDescriptor *descriptor = [[MTLRenderPipelineDescriptor alloc] init]; + [descriptor setLabel:@"Blit Pass"]; + + auto attachment = [descriptor colorAttachments][0]; + [attachment setPixelFormat:MTLPixelFormatRGBA8Unorm]; + + [descriptor setVertexFunction:shaderManager->GetBlitVertexShader()]; + [descriptor setFragmentFunction:shaderManager->GetBlitFragmentShader()]; + + NSError *error = nil; + auto state = [MetalContext::Instance()->GetDevice() newRenderPipelineStateWithDescriptor:descriptor error:&error]; + + if (state == nil) { + ERROR_LOG(RENDERER, "Failed to create Blit Pipeline State: %s", [[error localizedDescription] UTF8String]);; + } + + blitPassPipeline = state; +} + +void MetalPipelineManager::CreateModVolPipeline(ModVolMode mode, bool naomi2) { + MTLVertexDescriptor *vertexDesc = nil; + MTLRenderPipelineDescriptor *descriptor = [[MTLRenderPipelineDescriptor alloc] init]; + + [descriptor setLabel:@"Mod Vol Pass"]; + + if (mode == ModVolMode::Final) { + [descriptor setVertexDescriptor:GetMainVertexInputDescriptor(false, naomi2)]; + } + else { + vertexDesc = [[MTLVertexDescriptor alloc] init]; + + auto layout = [vertexDesc layouts][30]; + [layout setStride:sizeof(float) * 3]; + + auto attribute = [vertexDesc attributes][0]; + [attribute setOffset:0]; + [attribute setBufferIndex:30]; + [attribute setFormat:MTLVertexFormatFloat3]; + + [descriptor setVertexDescriptor:vertexDesc]; + } + + auto attachment = [descriptor colorAttachments][0]; + [attachment setBlendingEnabled:mode == ModVolMode::Final]; + [attachment setSourceRGBBlendFactor:MTLBlendFactorSourceAlpha]; + [attachment setDestinationRGBBlendFactor:MTLBlendFactorOneMinusSourceAlpha]; + [attachment setRgbBlendOperation:MTLBlendOperationAdd]; + [attachment setSourceAlphaBlendFactor:MTLBlendFactorSourceAlpha]; + [attachment setDestinationAlphaBlendFactor:MTLBlendFactorOneMinusSourceAlpha]; + [attachment setAlphaBlendOperation:MTLBlendOperationAdd]; + [attachment setWriteMask:mode != ModVolMode::Final ? MTLColorWriteMaskNone : MTLColorWriteMaskAll]; + [attachment setPixelFormat:MTLPixelFormatRGBA8Unorm]; + + [descriptor setDepthAttachmentPixelFormat:MTLPixelFormatDepth32Float_Stencil8]; + [descriptor setStencilAttachmentPixelFormat:MTLPixelFormatDepth32Float_Stencil8]; + + MetalModVolShaderParams shaderParams { naomi2, !settings.platform.isNaomi2() && config::NativeDepthInterpolation }; + [descriptor setVertexFunction:shaderManager->GetModVolVertexShader(shaderParams)]; + [descriptor setFragmentFunction:shaderManager->GetModVolFragmentShader(!settings.platform.isNaomi2() && config::NativeDepthInterpolation)]; + + NSError *error = nil; + auto state = [MetalContext::Instance()->GetDevice() newRenderPipelineStateWithDescriptor:descriptor error:&error]; + + if (state == nullptr) { + ERROR_LOG(RENDERER, "Failed to create Depth Render Pipeline State: %s", [[error localizedDescription] UTF8String]); + } + + modVolPipelines[hash(mode, naomi2)] = state; +} + +void MetalPipelineManager::CreateDepthPassPipeline(bool naomi2) +{ + MTLRenderPipelineDescriptor *descriptor = [[MTLRenderPipelineDescriptor alloc] init]; + [descriptor setLabel:@"Depth Pass"]; + [descriptor setVertexDescriptor:GetMainVertexInputDescriptor(false, false)]; + + auto attachment = [descriptor colorAttachments][0]; + [attachment setBlendingEnabled:false]; + [attachment setSourceRGBBlendFactor:MTLBlendFactorZero]; + [attachment setDestinationRGBBlendFactor:MTLBlendFactorZero]; + [attachment setRgbBlendOperation:MTLBlendOperationAdd]; + [attachment setSourceAlphaBlendFactor:MTLBlendFactorZero]; + [attachment setDestinationAlphaBlendFactor:MTLBlendFactorZero]; + [attachment setAlphaBlendOperation:MTLBlendOperationAdd]; + [attachment setWriteMask:MTLColorWriteMaskNone]; + + MetalModVolShaderParams shaderParams { naomi2, !settings.platform.isNaomi2() && config::NativeDepthInterpolation }; + [descriptor setVertexFunction:shaderManager->GetModVolVertexShader(shaderParams)]; + [descriptor setFragmentFunction:shaderManager->GetModVolFragmentShader(!settings.platform.isNaomi2() && config::NativeDepthInterpolation)];; + + NSError *error = nil; + auto state = [MetalContext::Instance()->GetDevice() newRenderPipelineStateWithDescriptor:descriptor error:&error]; + + if (state == nil) { + ERROR_LOG(RENDERER, "Failed to create Depth Render Pipeline State: %s", [[error localizedDescription] UTF8String]); + } + + depthPassPipelines[hash(naomi2)] = state; +} + +void MetalPipelineManager::CreatePipeline(u32 listType, bool sortTriangles, const PolyParam &pp, int gpuPalette, bool dithering) { + MTLRenderPipelineDescriptor *descriptor = [[MTLRenderPipelineDescriptor alloc] init]; + [descriptor setLabel:@"Main Draw"]; + [descriptor setVertexDescriptor:GetMainVertexInputDescriptor(true, pp.isNaomi2())]; + + u32 src = pp.tsp.SrcInstr; + u32 dst = pp.tsp.DstInstr; + + auto attachment = [descriptor colorAttachments][0]; + [attachment setBlendingEnabled:true]; + [attachment setSourceRGBBlendFactor:GetBlendFactor(src, true)]; + [attachment setDestinationRGBBlendFactor:GetBlendFactor(dst, false)]; + [attachment setRgbBlendOperation:MTLBlendOperationAdd]; + [attachment setSourceAlphaBlendFactor:GetBlendFactor(src, true)]; + [attachment setDestinationAlphaBlendFactor:GetBlendFactor(dst, false)]; + [attachment setAlphaBlendOperation:MTLBlendOperationAdd]; + [attachment setWriteMask:MTLColorWriteMaskAll]; + [attachment setPixelFormat:MTLPixelFormatRGBA8Unorm]; + + [descriptor setDepthAttachmentPixelFormat:MTLPixelFormatDepth32Float_Stencil8]; + [descriptor setStencilAttachmentPixelFormat:MTLPixelFormatDepth32Float_Stencil8]; + + bool divPosZ = !settings.platform.isNaomi2() && config::NativeDepthInterpolation; + + MetalVertexShaderParams vertParams = {}; + vertParams.gouraud = pp.pcw.Gouraud == 1; + vertParams.naomi2 = pp.isNaomi2(); + vertParams.divPosZ = divPosZ; + + MetalFragmentShaderParams fragParams = {}; + fragParams.alphaTest = listType == ListType_Punch_Through; + fragParams.bumpmap = pp.tcw.PixelFmt == PixelBumpMap; + fragParams.clamping = pp.tsp.ColorClamp; + fragParams.insideClipTest = (pp.tileclip >> 28) == 3; + fragParams.fog = config::Fog ? pp.tsp.FogCtrl : 2; + fragParams.gouraud = pp.pcw.Gouraud; + fragParams.ignoreTexAlpha = pp.tsp.IgnoreTexA || pp.tcw.PixelFmt == Pixel565; + fragParams.offset = pp.pcw.Offset; + fragParams.shaderInstr = pp.tsp.ShadInstr; + fragParams.texture = pp.pcw.Texture; + fragParams.trilinear = pp.pcw.Texture && pp.tsp.FilterMode > 1 && listType != ListType_Punch_Through && pp.tcw.MipMapped == 1; + fragParams.useAlpha = pp.tsp.UseAlpha; + fragParams.palette = gpuPalette; + fragParams.divPosZ = divPosZ; + fragParams.dithering = dithering; + + [descriptor setVertexFunction:shaderManager->GetVertexShader(vertParams)]; + [descriptor setFragmentFunction:shaderManager->GetFragmentShader(fragParams)]; + + NSError *error = nil; + auto state = [MetalContext::Instance()->GetDevice() newRenderPipelineStateWithDescriptor:descriptor error:&error]; + + if (state == nullptr) { + ERROR_LOG(RENDERER, "Failed to create Render Pipeline State: %s", [[error localizedDescription] UTF8String]); + } + + pipelines[hash(listType, sortTriangles, &pp, gpuPalette, dithering)] = state; +} + +void MetalPipelineManager::CreateModVolDepthStencilState(ModVolMode mode, bool naomi2) { + MTLDepthStencilDescriptor *descriptor = [[MTLDepthStencilDescriptor alloc] init]; + [descriptor setDepthWriteEnabled:false]; + [descriptor setDepthCompareFunction:mode == ModVolMode::Xor || mode == ModVolMode::Or ? MTLCompareFunctionGreater : MTLCompareFunctionAlways]; + + MTLStencilDescriptor *stencilDescriptor = [[MTLStencilDescriptor alloc] init]; + switch (mode) + { + case ModVolMode::Xor: + [stencilDescriptor setStencilFailureOperation:MTLStencilOperationKeep]; + [stencilDescriptor setDepthStencilPassOperation:MTLStencilOperationInvert]; + [stencilDescriptor setDepthFailureOperation:MTLStencilOperationKeep]; + [stencilDescriptor setStencilCompareFunction:MTLCompareFunctionAlways]; + [stencilDescriptor setReadMask:0]; + [stencilDescriptor setWriteMask:2]; + break; + case ModVolMode::Or: + [stencilDescriptor setStencilFailureOperation:MTLStencilOperationKeep]; + [stencilDescriptor setDepthStencilPassOperation:MTLStencilOperationReplace]; + [stencilDescriptor setDepthFailureOperation:MTLStencilOperationKeep]; + [stencilDescriptor setStencilCompareFunction:MTLCompareFunctionAlways]; + [stencilDescriptor setReadMask:2]; + [stencilDescriptor setWriteMask:2]; + break; + case ModVolMode::Inclusion: + [stencilDescriptor setStencilFailureOperation:MTLStencilOperationZero]; + [stencilDescriptor setDepthStencilPassOperation:MTLStencilOperationReplace]; + [stencilDescriptor setDepthFailureOperation:MTLStencilOperationZero]; + [stencilDescriptor setStencilCompareFunction:MTLCompareFunctionLessEqual]; + [stencilDescriptor setReadMask:3]; + [stencilDescriptor setWriteMask:3]; + break; + case ModVolMode::Exclusion: + [stencilDescriptor setStencilFailureOperation:MTLStencilOperationZero]; + [stencilDescriptor setDepthStencilPassOperation:MTLStencilOperationKeep]; + [stencilDescriptor setDepthFailureOperation:MTLStencilOperationZero]; + [stencilDescriptor setStencilCompareFunction:MTLCompareFunctionEqual]; + [stencilDescriptor setReadMask:3]; + [stencilDescriptor setWriteMask:3]; + break; + case ModVolMode::Final: + [stencilDescriptor setStencilFailureOperation:MTLStencilOperationZero]; + [stencilDescriptor setDepthStencilPassOperation:MTLStencilOperationZero]; + [stencilDescriptor setDepthFailureOperation:MTLStencilOperationZero]; + [stencilDescriptor setStencilCompareFunction:MTLCompareFunctionEqual]; + [stencilDescriptor setReadMask:0x81]; + [stencilDescriptor setWriteMask:3]; + break; + } + + [descriptor setFrontFaceStencil:stencilDescriptor]; + [descriptor setBackFaceStencil:stencilDescriptor]; + + auto state = [MetalContext::Instance()->GetDevice() newDepthStencilStateWithDescriptor:descriptor]; + + modVolStencilStates[hash(mode, naomi2)] = state; +} + +void MetalPipelineManager::CreateDepthPassDepthStencilState(bool naomi2) { + MTLDepthStencilDescriptor *descriptor = [[MTLDepthStencilDescriptor alloc] init]; + [descriptor setLabel:@"Sorted Depth Pass"]; + [descriptor setDepthWriteEnabled:true]; + [descriptor setDepthCompareFunction:MTLCompareFunctionGreaterEqual]; + + auto state = [MetalContext::Instance()->GetDevice() newDepthStencilStateWithDescriptor:descriptor]; + + depthPassDepthStencilStates[hash(naomi2)] = state; +} + +void MetalPipelineManager::CreateDepthStencilState(u32 listType, bool sortTriangles, bool shadowed, const PolyParam &pp) { + MTLDepthStencilDescriptor *descriptor = [[MTLDepthStencilDescriptor alloc] init]; + if (shadowed) + [descriptor setLabel:@"Main Shadowed Depth-Stencil State"]; + else + [descriptor setLabel:@"Main Depth-Stencil State"]; + + MTLCompareFunction compareFunction; + if (listType == ListType_Punch_Through || sortTriangles) { + compareFunction = MTLCompareFunctionGreaterEqual; + } else { + compareFunction = depthOps[pp.isp.DepthMode]; + } + + bool depthWriteEnabled; + if (sortTriangles) { + depthWriteEnabled = false; + } else { + // Z Write Disable seems to be ignored for punch-through. + // Fixes Worms World Party, Bust-a-Move 4 and Re-Volt + if (listType == ListType_Punch_Through) { + depthWriteEnabled = true; + } else { + depthWriteEnabled = !pp.isp.ZWriteDis; + } + } + + MTLStencilDescriptor *stencilDescriptor = [[MTLStencilDescriptor alloc] init]; + [stencilDescriptor setStencilFailureOperation:MTLStencilOperationKeep]; + [stencilDescriptor setDepthStencilPassOperation:MTLStencilOperationKeep]; + + if (shadowed) { + [stencilDescriptor setDepthStencilPassOperation:MTLStencilOperationReplace]; + [stencilDescriptor setStencilCompareFunction:MTLCompareFunctionAlways]; + [stencilDescriptor setReadMask:0]; + [stencilDescriptor setWriteMask:0x80]; + } + + [descriptor setDepthCompareFunction:compareFunction]; + [descriptor setDepthWriteEnabled:depthWriteEnabled]; + + if (shadowed) { + [descriptor setBackFaceStencil:stencilDescriptor]; + [descriptor setFrontFaceStencil:stencilDescriptor]; + } + + auto state = [MetalContext::Instance()->GetDevice() newDepthStencilStateWithDescriptor:descriptor]; + + depthStencilStates[hash(listType, sortTriangles, shadowed, &pp)] = state; +} diff --git a/core/rend/metal/metal_quad.h b/core/rend/metal/metal_quad.h new file mode 100644 index 0000000000..c831d9cdc4 --- /dev/null +++ b/core/rend/metal/metal_quad.h @@ -0,0 +1,115 @@ +/* + Copyright 2025 flyinghead + + This file is part of Flycast. + + Flycast is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + Flycast is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Flycast. If not, see . +*/ + +#pragma once + +#include +#include "metal_shaders.h" +#include "metal_buffer.h" + +struct MetalQuadVertex +{ + float x, y, z; + float u, v; +}; + +class MetalQuadBuffer +{ +public: + MetalQuadBuffer() + { + buffer = std::make_unique(sizeof(MetalQuadVertex) * 4); + } + + void Bind(id commandEncoder) + { + [commandEncoder setVertexBuffer:buffer->buffer offset:0 atIndex:0]; + } + + void Draw(id commandEncoder) + { + [commandEncoder drawPrimitives:MTLPrimitiveTypeTriangleStrip vertexStart:0 vertexCount:4]; + } + + void Update(MetalQuadVertex vertices[4] = nullptr) + { + if (vertices == nullptr) + { + static MetalQuadVertex defaultVtx[4] + { + { -1.f, -1.f, 0.f, 0.f, 1.f }, + { 1.f, -1.f, 0.f, 1.f, 1.f }, + { -1.f, 1.f, 0.f, 0.f, 0.f }, + { 1.f, 1.f, 0.f, 1.f, 0.f }, + }; + vertices = defaultVtx; + }; + + memcpy([buffer->buffer contents], vertices, sizeof(MetalQuadVertex) * 4); + } +private: + std::unique_ptr buffer; +}; + +class MetalQuadPipeline +{ +public: + MetalQuadPipeline(bool ignoreTexAlpha, bool rotate = false) + : rotate(rotate), ignoreTexAlpha(ignoreTexAlpha) {} + void Init(MetalShaders *shaderManager); + void Term() { + linearSampler = nil; + nearestSampler = nil; + } + void BindPipeline(id commandEncoder) { [commandEncoder setRenderPipelineState:GetPipeline()]; } + + id GetLinearSampler() { return linearSampler; } + id GetNearestSampler() { return nearestSampler; } +private: + id GetPipeline() { + if (!pipeline) + CreatePipeline(); + return pipeline; + } + void CreatePipeline(); + + + id pipeline; + id linearSampler; + id nearestSampler; + MetalShaders *shaderManager = nullptr; + bool rotate; + bool ignoreTexAlpha; +}; + +class MetalQuadDrawer +{ +public: + MetalQuadDrawer() = default; + MetalQuadDrawer(MetalQuadDrawer &&) = default; + MetalQuadDrawer(const MetalQuadDrawer &) = delete; + MetalQuadDrawer& operator=(MetalQuadDrawer &&) = default; + MetalQuadDrawer& operator=(const MetalQuadDrawer &) = delete; + + void Init(MetalQuadPipeline *pipeline); + void Draw(id commandEncoder, id texture, MetalQuadVertex vertices[4] = nullptr, bool nearestFilter = false, const float *color = nullptr); +private: + MetalQuadPipeline *pipeline = nullptr; + std::unique_ptr buffer; +}; diff --git a/core/rend/metal/metal_quad.mm b/core/rend/metal/metal_quad.mm new file mode 100644 index 0000000000..797d7cadac --- /dev/null +++ b/core/rend/metal/metal_quad.mm @@ -0,0 +1,119 @@ +/* + Copyright 2025 flyinghead + + This file is part of Flycast. + + Flycast is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + Flycast is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Flycast. If not, see . +*/ + +#include "metal_quad.h" +#import "metal_context.h" + +void MetalQuadPipeline::CreatePipeline() +{ + MTLRenderPipelineDescriptor *pipelineDescriptor = [[MTLRenderPipelineDescriptor alloc] init]; + [pipelineDescriptor setVertexFunction:shaderManager->GetQuadVertexShader(rotate)]; + [pipelineDescriptor setFragmentFunction:shaderManager->GetQuadFragmentShader(ignoreTexAlpha)]; + + [pipelineDescriptor setInputPrimitiveTopology:MTLPrimitiveTopologyClassTriangle]; + + auto color = pipelineDescriptor.colorAttachments[0]; + [color setBlendingEnabled:TRUE]; + [color setSourceRGBBlendFactor:MTLBlendFactorSourceAlpha]; + [color setDestinationRGBBlendFactor:MTLBlendFactorOneMinusSourceAlpha]; + [color setRgbBlendOperation:MTLBlendOperationAdd]; + [color setSourceAlphaBlendFactor:MTLBlendFactorSourceAlpha]; + [color setDestinationAlphaBlendFactor:MTLBlendFactorOneMinusSourceAlpha]; + [color setAlphaBlendOperation:MTLBlendOperationAdd]; + [color setWriteMask:MTLColorWriteMaskAll]; + [color setPixelFormat:MTLPixelFormatRGBA8Unorm]; + + MTLVertexDescriptor *vertexDescriptor = [[MTLVertexDescriptor alloc] init]; + auto pos = vertexDescriptor.attributes[0]; + [pos setFormat:MTLVertexFormatFloat3]; + [pos setBufferIndex:0]; + [pos setOffset:offsetof(MetalQuadVertex, x)]; + + auto uv = vertexDescriptor.attributes[1]; + [uv setFormat:MTLVertexFormatFloat2]; + [uv setBufferIndex:0]; + [uv setOffset:offsetof(MetalQuadVertex, u)]; + + auto layout = vertexDescriptor.layouts[0]; + [layout setStride:sizeof(MetalQuadVertex)]; + + [pipelineDescriptor setVertexDescriptor:vertexDescriptor]; + + NSError *error = nil; + pipeline = [MetalContext::Instance()->GetDevice() newRenderPipelineStateWithDescriptor:pipelineDescriptor error:&error]; + + if (pipeline == nil) + { + ERROR_LOG(RENDERER, "Failed to create quad pipeline: %s", [[error localizedDescription] UTF8String]); + } +} + +void MetalQuadPipeline::Init(MetalShaders *shaderManager) +{ + this->shaderManager = shaderManager; + if (linearSampler == nil) + { + MTLSamplerDescriptor *samplerDescriptor = [[MTLSamplerDescriptor alloc] init]; + [samplerDescriptor setMinFilter:MTLSamplerMinMagFilterLinear]; + [samplerDescriptor setMagFilter:MTLSamplerMinMagFilterLinear]; + [samplerDescriptor setMipFilter:MTLSamplerMipFilterLinear]; + [samplerDescriptor setSAddressMode:MTLSamplerAddressModeClampToEdge]; + [samplerDescriptor setTAddressMode:MTLSamplerAddressModeClampToEdge]; + [samplerDescriptor setRAddressMode:MTLSamplerAddressModeClampToEdge]; + linearSampler = [MetalContext::Instance()->GetDevice() newSamplerStateWithDescriptor:samplerDescriptor]; + } + if (nearestSampler == nil) + { + MTLSamplerDescriptor *samplerDescriptor = [[MTLSamplerDescriptor alloc] init]; + [samplerDescriptor setMinFilter:MTLSamplerMinMagFilterNearest]; + [samplerDescriptor setMagFilter:MTLSamplerMinMagFilterNearest]; + [samplerDescriptor setMipFilter:MTLSamplerMipFilterNearest]; + [samplerDescriptor setSAddressMode:MTLSamplerAddressModeClampToEdge]; + [samplerDescriptor setTAddressMode:MTLSamplerAddressModeClampToEdge]; + [samplerDescriptor setRAddressMode:MTLSamplerAddressModeClampToEdge]; + nearestSampler = [MetalContext::Instance()->GetDevice() newSamplerStateWithDescriptor:samplerDescriptor]; + } +} + +void MetalQuadDrawer::Init(MetalQuadPipeline *pipeline) { + this->pipeline = pipeline; + buffer = std::make_unique(); +} + +void MetalQuadDrawer::Draw(id commandEncoder, id texture, MetalQuadVertex *vertices, bool nearestFilter, const float *color) +{ + pipeline->BindPipeline(commandEncoder); + buffer->Update(vertices); + buffer->Bind(commandEncoder); + + if (texture != nil) + { + [commandEncoder setFragmentTexture:texture atIndex:0]; + [commandEncoder setFragmentSamplerState:nearestFilter ? pipeline->GetNearestSampler() : pipeline->GetLinearSampler() atIndex:0]; + } + + if (color == nullptr) + { + static float fullWhite[] { 1.f, 1.f, 1.f, 1.f }; + color = fullWhite; + } + + [commandEncoder setFragmentBytes:color length:sizeof(float) * 4 atIndex:0]; + buffer->Draw(commandEncoder); +} \ No newline at end of file diff --git a/core/rend/metal/metal_renderer.h b/core/rend/metal/metal_renderer.h new file mode 100644 index 0000000000..3673a3aa27 --- /dev/null +++ b/core/rend/metal/metal_renderer.h @@ -0,0 +1,76 @@ +/* + Copyright 2025 flyinghead + + This file is part of Flycast. + + Flycast is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + Flycast is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Flycast. If not, see . +*/ + +#pragma once +#include "metal_pipeline.h" +#include "metal_shaders.h" +#include "metal_texture.h" +#include "metal_buffer.h" +#include "metal_drawer.h" +#include "metal_commandpool.h" + +#include "hw/pvr/Renderer_if.h" +#include "rend/tileclip.h" +#include "rend/transform_matrix.h" + +class BaseMetalRenderer : public Renderer +{ +protected: + bool BaseInit(); + +public: + void Term() override; + BaseTextureCacheData *GetTexture(TSP tsp, TCW tcw) override; + void Process(TA_context* ctx) override; + void ReInitOSD(); + void RenderFramebuffer(const FramebufferInfo& info) override; + void WaitIdle(); + + bool RenderLastFrame() override { + return !clearLastFrame; + } + + bool GetLastFrame(std::vector& data, int& width, int& height) override { + return MetalContext::Instance()->GetLastFrame(data, width, height); + } + +protected: + virtual void resize(int w, int h) + { + viewport.width = w; + viewport.height = h; + } + + void CheckFogTexture(); + void CheckPaletteTexture(); + bool presentFramebuffer(); + + MetalShaders shaderManager; + std::unique_ptr fogTexture; + std::unique_ptr paletteTexture; + MetalCommandPool texCommandPool; + MetalCommandPool fbCommandPool; + id texCommandBuffer = nil; + std::vector> framebufferTextures; + int framebufferTexIndex = 0; + MetalTextureCache textureCache; + MTLViewport viewport = MTLViewport { 0, 0, 640, 480, 0, 0 }; + bool framebufferRendered = false; +}; + diff --git a/core/rend/metal/metal_renderer.mm b/core/rend/metal/metal_renderer.mm new file mode 100644 index 0000000000..c30ccf89c7 --- /dev/null +++ b/core/rend/metal/metal_renderer.mm @@ -0,0 +1,301 @@ +/* + Copyright 2025 flyinghead + + This file is part of Flycast. + + Flycast is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + Flycast is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Flycast. If not, see . +*/ + +#include +#include + +#include "metal_renderer.h" +#include "hw/aica/dsp.h" +#include "hw/pvr/ta.h" + +bool BaseMetalRenderer::BaseInit() +{ + texCommandPool.Init(); + fbCommandPool.Init(); + + return true; +} + +void BaseMetalRenderer::Term() +{ + WaitIdle(); + MetalContext::Instance()->PresentFrame(nil, MTLViewport {}, 0); + texCommandPool.Term(); + fbCommandPool.Term(); + textureCache.Clear(); + fogTexture = nil; + paletteTexture = nil; + framebufferTextures.clear(); + framebufferTexIndex = 0; + shaderManager.term(); +} + +BaseTextureCacheData *BaseMetalRenderer::GetTexture(TSP tsp, TCW tcw) +{ + MetalTexture* tf = textureCache.getTextureCacheData(tsp, tcw); + + if (tf->NeedsUpdate()) { + tf->SetCommandBuffer(texCommandBuffer); + + if (!tf->Update()) + { + tf->SetCommandBuffer(nil); + return nullptr; + } + } + else if (tf->IsCustomTextureAvailable()) { + // TODO + tf->SetCommandBuffer(texCommandBuffer); + } + tf->SetCommandBuffer(nil); + textureCache.SetInFlight(tf); + + return tf; +} + +void BaseMetalRenderer::Process(TA_context *ctx) +{ + if (!ctx->rend.isRTT) { + framebufferRendered = false; + if (!config::EmulateFramebuffer) + clearLastFrame = false; + } + if (resetTextureCache) { + textureCache.Clear(); + resetTextureCache = false; + } + + texCommandPool.BeginFrame(); + textureCache.SetCurrentIndex(texCommandPool.GetIndex()); + textureCache.Cleanup(); + + texCommandBuffer = texCommandPool.Allocate(); + + ta_parse(ctx, true); + + // TODO can't update fog or palette twice in multi render + CheckFogTexture(); + CheckPaletteTexture(); + texCommandBuffer = nil; +} + +void BaseMetalRenderer::ReInitOSD() +{ + texCommandPool.Init(); + fbCommandPool.Init(); +} + +void BaseMetalRenderer::RenderFramebuffer(const FramebufferInfo &info) +{ + framebufferTexIndex = (framebufferTexIndex + 1) % 3; + + if (framebufferTextures.size() != 3) + framebufferTextures.resize(3); + + std::unique_ptr& curTexture = framebufferTextures[framebufferTexIndex]; + if (!curTexture) + { + curTexture = std::make_unique(); + curTexture->tex_type = TextureType::_8888; + } + + fbCommandPool.BeginFrame(); + id fbCommandBuffer = fbCommandPool.Allocate(); + + curTexture->SetCommandBuffer(fbCommandBuffer); + if (info.fb_r_ctrl.fb_enable == 0 || info.vo_control.blank_video == 1) + { + // Video output disabled + u8 rgba[]{ (u8)info.vo_border_col._red, (u8)info.vo_border_col._green, (u8)info.vo_border_col._blue, 255 }; + curTexture->UploadToGPU(1, 1, rgba, false); + } + else + { + PixelBuffer pb; + int width; + int height; + ReadFramebuffer(info, pb, width, height); + + curTexture->UploadToGPU(width, height, (u8*)pb.data(), false); + } + curTexture->SetCommandBuffer(nil); + fbCommandBuffer = nil; + fbCommandPool.EndFrame(); + framebufferRendered = true; + clearLastFrame = false; +} + +void BaseMetalRenderer::WaitIdle() +{ +// [commandBuffer waitUntilCompleted]; +// commandBuffer = nil; +} + +void BaseMetalRenderer::CheckFogTexture() { + if (!fogTexture) + { + fogTexture = std::make_unique(); + fogTexture->tex_type = TextureType::_8; + updateFogTable = true; + } + if (!updateFogTable || !config::Fog) + return; + updateFogTable = false; + u8 texData[256]; + MakeFogTexture(texData); + + fogTexture->SetCommandBuffer(texCommandBuffer); + fogTexture->UploadToGPU(128, 2, texData, false); + fogTexture->SetCommandBuffer(nil); +} + +void BaseMetalRenderer::CheckPaletteTexture() { + if (!paletteTexture) + { + paletteTexture = std::make_unique(); + paletteTexture->tex_type = TextureType::_8888; + } + else if (!updatePalette) + return; + updatePalette = false; + + paletteTexture->SetCommandBuffer(texCommandBuffer); + paletteTexture->UploadToGPU(1024, 1, (u8 *)palette32_ram, false); + paletteTexture->SetCommandBuffer(nil); +} + +bool BaseMetalRenderer::presentFramebuffer() +{ + if (framebufferTexIndex >= (int)framebufferTextures.size()) + return false; + MetalTexture *fbTexture = framebufferTextures[framebufferTexIndex].get(); + if (fbTexture == nullptr) + return false; + + MTLViewport viewport = { 0, 0, (float)fbTexture->GetTexture().width, (float)fbTexture->GetTexture().height, 1.0, 0 }; + + MetalContext::Instance()->PresentFrame(fbTexture->GetTexture(), viewport, + getDCFramebufferAspectRatio()); + return true; +} + +class MetalRenderer final : public BaseMetalRenderer +{ +public: + bool Init() override + { + NOTICE_LOG(RENDERER, "MetalRenderer::Init"); + + textureDrawer.Init(&samplerManager, &shaderManager, &textureCache); + textureDrawer.SetCommandPool(&texCommandPool); + + screenDrawer.Init(&samplerManager, &shaderManager, viewport); + screenDrawer.SetCommandPool(&texCommandPool); + BaseInit(); + emulateFramebuffer = config::EmulateFramebuffer; + + return true; + } + + void Term() + { + NOTICE_LOG(RENDERER, "MetalRenderer::Term"); + WaitIdle(); + texCommandPool.Term(); + screenDrawer.Term(); + textureDrawer.Term(); + samplerManager.term(); + BaseMetalRenderer::Term(); + } + + void Process(TA_context* ctx) override + { + if (emulateFramebuffer != config::EmulateFramebuffer) + { + screenDrawer.EndRenderPass(); + WaitIdle(); + screenDrawer.Term(); + screenDrawer.Init(&samplerManager, &shaderManager, viewport); + // BaseInit(screenDrawer.GetRenderPass()); + emulateFramebuffer = config::EmulateFramebuffer; + } + else if (ctx->rend.isRTT) { + screenDrawer.EndRenderPass(); + } + BaseMetalRenderer::Process(ctx); + } + + bool Render() override + { + MetalDrawer *drawer; + if (pvrrc.isRTT) + drawer = &textureDrawer; + else { + resize(pvrrc.framebufferWidth, pvrrc.framebufferHeight); + drawer = &screenDrawer; + } + + drawer->Draw(fogTexture.get(), paletteTexture.get()); + if (config::EmulateFramebuffer || pvrrc.isRTT) + // delay ending the render pass in case of multi render + drawer->EndRenderPass(); + + return !pvrrc.isRTT; + } + + bool Present() override + { + if (clearLastFrame) + return false; + if (config::EmulateFramebuffer || framebufferRendered) + return presentFramebuffer(); + else + return screenDrawer.PresentFrame(); + } + +protected: + void resize(int w, int h) override + { + if ((u32)w == viewport.width && (u32)h == viewport.height) + return; + BaseMetalRenderer::resize(w, h); + // WaitIdle(); + screenDrawer.Init(&samplerManager, &shaderManager, viewport); + } + +private: + MetalSamplers samplerManager; + MetalScreenDrawer screenDrawer; + MetalTextureDrawer textureDrawer; + bool emulateFramebuffer = false; +}; + +Renderer* rend_Metal() +{ + return new MetalRenderer(); +} + +void MetalReInitOSD() +{ + if (renderer != nullptr) { + BaseMetalRenderer *mtlrenderer = dynamic_cast(renderer); + if (mtlrenderer != nullptr) + mtlrenderer->ReInitOSD(); + } +} \ No newline at end of file diff --git a/core/rend/metal/metal_shaders.h b/core/rend/metal/metal_shaders.h new file mode 100644 index 0000000000..663d2edd8f --- /dev/null +++ b/core/rend/metal/metal_shaders.h @@ -0,0 +1,205 @@ +/* + Copyright 2025 flyinghead + + This file is part of Flycast. + + Flycast is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + Flycast is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Flycast. If not, see . +*/ + +#pragma once + +#include + +#include "types.h" +#include +#include + +struct MetalVertexShaderParams +{ + bool gouraud; + bool naomi2; + bool divPosZ; + + u32 hash() { return (u32)gouraud | ((u32)naomi2 << 1) | ((u32)divPosZ << 2); } +}; + +struct MetalFragmentShaderParams +{ + bool alphaTest; + bool insideClipTest; + bool useAlpha; + bool texture; + bool ignoreTexAlpha; + int shaderInstr; + bool offset; + int fog; + bool gouraud; + bool bumpmap; + bool clamping; + bool trilinear; + int palette; + bool divPosZ; + bool dithering; + + u32 hash() + { + return ((u32)alphaTest) | ((u32)insideClipTest << 1) | ((u32)useAlpha << 2) + | ((u32)texture << 3) | ((u32)ignoreTexAlpha << 4) | (shaderInstr << 5) + | ((u32)offset << 7) | ((u32)fog << 8) | ((u32)gouraud << 10) + | ((u32)bumpmap << 11) | ((u32)clamping << 12) | ((u32)trilinear << 13) + | ((u32)palette << 14) | ((u32)divPosZ << 16) | ((u32)dithering << 17); + } +}; + +struct MetalModVolShaderParams +{ + bool naomi2; + bool divPosZ; + + u32 hash() { return (u32)naomi2 | ((u32)divPosZ << 1); } +}; + +// std140 alignment required +struct MetalVertexShaderUniforms +{ + glm::mat4 ndcMat; +}; + +// std140 alignment required +struct MetalFragmentShaderUniforms +{ + float colorClampMin[4]; + float colorClampMax[4]; + float sp_FOG_COL_RAM[4]; // Only using 3 elements but easier for std140 + float sp_FOG_COL_VERT[4]; // same comment + float ditherDivisor[4]; + float cp_AlphaTestValue; + float sp_FOG_DENSITY; +}; + +// std140 alignment required +struct MetalN2VertexShaderUniforms +{ + glm::mat4 mvMat; + glm::mat4 normalMat; + glm::mat4 projMat; + int envMapping[2]; + int bumpMapping; + int polyNumber; + + float glossCoef[2]; + int constantColor[2]; +}; + +class MetalShaders +{ +public: + MetalShaders(); + + id GetBlitVertexShader() { return blitVertexShader; } + id GetBlitFragmentShader() { return blitFragmentShader; } + + id GetModVolVertexShader(const MetalModVolShaderParams& params) { return getShader(modVolVertexShaders, params); } + id GetModVolFragmentShader(bool divPosZ) { + auto modVolFragmentShader = modVolFragmentShaders.find(divPosZ); + if (modVolFragmentShader != modVolFragmentShaders.end()) + return modVolFragmentShader->second; + + modVolFragmentShaders[divPosZ] = compileShader(divPosZ); + return modVolFragmentShaders[divPosZ]; + } + + id GetQuadVertexShader(bool rotate) { + if (rotate) + { + if (quadRotateVertexShader == nil) + quadRotateVertexShader = compileQuadVertexShader(true); + return quadRotateVertexShader; + } + else + { + if (quadVertexShader == nil) + quadVertexShader = compileQuadVertexShader(false); + return quadVertexShader; + } + } + + id GetQuadFragmentShader(bool ignoreTexAlpha) { + if (ignoreTexAlpha) + { + if (quadNoAlphaFragmentShader == nil) + quadNoAlphaFragmentShader = compileQuadFragmentShader(true); + return quadNoAlphaFragmentShader; + } + else + { + if (quadFragmentShader == nil) + quadFragmentShader = compileQuadFragmentShader(false); + return quadFragmentShader; + } + } + + id GetVertexShader(const MetalVertexShaderParams& params) { return getShader(vertexShaders, params); } + id GetFragmentShader(const MetalFragmentShaderParams& params) { return getShader(fragmentShaders, params); } + + void term() + { + vertexShaders.clear(); + fragmentShaders.clear(); + } + +private: + id blitShaderLibrary; + id modVolShaderLibrary; + id n2ModVolVertexShaderLibrary; + id vertexShaderLibrary; + id n2VertexShaderLibrary; + id fragmentShaderLibrary; + id quadShaderLibrary; + MTLFunctionConstantValues* vertexShaderConstants; + MTLFunctionConstantValues* fragmentShaderConstants; + MTLFunctionConstantValues* modVolShaderConstants; + MTLFunctionConstantValues* quadShaderConstants; + + template + id getShader(std::map> &map, T params) + { + u32 h = params.hash(); + auto it = map.find(h); + if (it != map.end()) + return it->second; + map[h] = compileShader(params); + return map[h]; + } + id compileShader(const MetalVertexShaderParams& params); + id compileShader(const MetalFragmentShaderParams& params); + id compileShader(const MetalModVolShaderParams& params); + id compileShader(bool divPosZ); + id compileQuadVertexShader(bool rotate); + id compileQuadFragmentShader(bool ignoreTexAlpha); + + id blitVertexShader; + id blitFragmentShader; + + std::map> modVolVertexShaders; + std::map> modVolFragmentShaders; + + std::map> vertexShaders; + std::map> fragmentShaders; + + id quadVertexShader; + id quadRotateVertexShader; + id quadFragmentShader; + id quadNoAlphaFragmentShader; +}; diff --git a/core/rend/metal/metal_shaders.mm b/core/rend/metal/metal_shaders.mm new file mode 100644 index 0000000000..0c3f6d9c91 --- /dev/null +++ b/core/rend/metal/metal_shaders.mm @@ -0,0 +1,1053 @@ +/* + Copyright 2025 flyinghead + + This file is part of Flycast. + + Flycast is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + Flycast is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Flycast. If not, see . +*/ + +#include "metal_shaders.h" + +#include "metal_context.h" + +static const char VertexShaderSource[] = R"( +#include +#include + +using namespace metal; + +constant bool pp_gouraud [[function_constant(0)]]; +constant bool div_pos_z [[function_constant(1)]]; + +constant bool is_flat = pp_gouraud == 0; +constant bool is_not_flat = !is_flat; + +struct VertexShaderUniforms +{ + float4x4 ndc_mat; +}; + +struct VertexIn +{ + float4 in_pos [[attribute(0)]]; + float4 in_base [[attribute(1)]]; + float4 in_offs [[attribute(2)]]; + float2 in_uv [[attribute(3)]]; +}; + +struct VertexOut +{ + float4 flat_vtx_base [[flat, function_constant(is_flat)]]; + float4 flat_vtx_offs [[flat, function_constant(is_flat)]]; + float4 vtx_base [[function_constant(is_not_flat)]]; + float4 vtx_offs [[function_constant(is_not_flat)]]; + float3 vtx_uv; + float4 position [[position]]; +}; + +vertex VertexOut vs_main(VertexIn in [[stage_in]], constant VertexShaderUniforms& uniforms [[buffer(0)]]) +{ + float4 vpos = uniforms.ndc_mat * in.in_pos; + + if (div_pos_z) { + vpos /= vpos.z; + vpos.z = vpos.w; + } + + VertexOut out = {}; + if (is_flat) { + out.flat_vtx_base = in.in_base; + out.flat_vtx_offs = in.in_offs; + } else { + out.vtx_base = in.in_base; + out.vtx_offs = in.in_offs; + } + out.vtx_uv = float3(in.in_uv, vpos.z); + + if (pp_gouraud && !div_pos_z) { + if (is_flat) { + out.flat_vtx_base *= vpos.z; + out.flat_vtx_offs *= vpos.z; + } else { + out.vtx_base *= vpos.z; + out.vtx_offs *= vpos.z; + } + } + + if (!div_pos_z) { + out.vtx_uv.xy *= vpos.z; + vpos.w = 1.0; + vpos.z = 0.0; + } + + out.position = vpos; + + return out; +} +)"; + +static const char FragmentShaderSource[] = R"( +#include +#define PI 3.1415926 + +using namespace metal; + +constant bool cp_alpha_test [[function_constant(0)]]; +constant bool pp_clip_inside [[function_constant(1)]]; +constant bool pp_use_alpha [[function_constant(2)]]; +constant bool pp_texture [[function_constant(3)]]; +constant bool pp_ignore_tex_a [[function_constant(4)]]; +constant int pp_shad_instr [[function_constant(5)]]; +constant bool pp_offset [[function_constant(6)]]; +constant int pp_fog_ctrl [[function_constant(7)]]; +constant bool pp_gouraud [[function_constant(8)]]; +constant bool pp_bump_map [[function_constant(9)]]; +constant bool color_clamping [[function_constant(10)]]; +constant bool pp_trilinear [[function_constant(11)]]; +constant int pp_palette [[function_constant(12)]]; +constant bool div_pos_z [[function_constant(13)]]; +constant bool dithering [[function_constant(14)]]; + +constant bool has_fog_table = pp_fog_ctrl != 2; +constant bool has_palette = pp_palette != 0; +constant bool is_flat = pp_gouraud == 0; +constant bool is_not_flat = !is_flat; + +struct FragmentShaderUniforms +{ + float4 color_clamp_min; + float4 color_clamp_max; + float4 sp_fog_col_ram; + float4 sp_fog_col_vert; + float4 dither_divisor; + float cp_alpha_test_value; + float sp_fog_density; +}; + +struct PushBlock +{ + float4 clip_test; + float trilinear_alpha; + float palette_index; +}; + +struct VertexOut +{ + float4 flat_vtx_base [[flat, function_constant(is_flat)]]; + float4 flat_vtx_offs [[flat, function_constant(is_flat)]]; + float4 vtx_base [[function_constant(is_not_flat)]]; + float4 vtx_offs [[function_constant(is_not_flat)]]; + float3 vtx_uv; + float4 position [[position]]; +}; + +struct FragmentOut +{ + float4 color [[color(0)]]; + float depth [[depth(any)]]; +}; + +float fog_mode2(float w, constant FragmentShaderUniforms& uniforms, + texture2d fog_table, sampler fog_table_sampler) +{ + float z = 0.0; + + if (div_pos_z) { + z = clamp(uniforms.sp_fog_density / w, 1.0, 255.9999); + } else { + z = clamp(uniforms.sp_fog_density * w, 1.0, 255.9999); + } + + float exp = floor(log2(z)); + float m = z * 16.0 / powr(2.0, exp) - 16.0; + float idx = floor(m) + exp * 16.0 + 0.5; + float4 fog_coef = fog_table.sample(fog_table_sampler, float2(idx / 128.0, 0.75 - (m - floor(m)) / 2.0)); + return fog_coef.r; +} + +float4 color_clamp(float4 col, constant FragmentShaderUniforms& uniforms) +{ + if (color_clamping) + { + return clamp(col, uniforms.color_clamp_min, uniforms.color_clamp_max); + } else { + return col; + } +} + +float4 get_palette_entry(texture2d palette, sampler palette_sampler, + float col_idx, constant PushBlock& push_constants) +{ + float2 c = float2(col_idx * 255.0 / 1023.0 + push_constants.palette_index, 0.5); + return palette.sample(palette_sampler, c); +} + +float4 palette_pixel(texture2d texture, sampler texture_sampler, + texture2d palette, sampler palette_sampler, + float3 coords, constant PushBlock& push_constants) +{ + if (div_pos_z) { + return get_palette_entry(palette, palette_sampler, texture.sample(texture_sampler, coords.xy).r, push_constants); + } else { + return get_palette_entry(palette, palette_sampler, texture.sample(texture_sampler, float2(coords.xy / coords.z)).r, push_constants); + } +} + +float4 palette_pixel_bilinear(texture2d texture, sampler texture_sampler, + texture2d palette, sampler palette_sampler, + float3 coords, constant PushBlock& push_constants) +{ + if (!div_pos_z) { + coords.xy /= coords.z; + } + + float2 tex_size = float2(texture.get_width(), texture.get_height()); + float2 pix_coord = coords.xy * tex_size - 0.5; // Coordinates of top left pixel + float2 origin_pix_coords = floor(pix_coord); + + float2 sample_uv = (origin_pix_coords + 0.5) / tex_size; // UV coordinates of center of top left pixel + + // Sample from all surrounding texels + float4 c00 = get_palette_entry(palette, palette_sampler, texture.sample(texture_sampler, sample_uv).r, push_constants); + float4 c01 = get_palette_entry(palette, palette_sampler, texture.sample(texture_sampler, sample_uv, int2(0, 1)).r, push_constants); + float4 c11 = get_palette_entry(palette, palette_sampler, texture.sample(texture_sampler, sample_uv, int2(1, 1)).r, push_constants); + float4 c10 = get_palette_entry(palette, palette_sampler, texture.sample(texture_sampler, sample_uv, int2(1, 0)).r, push_constants); + + float2 weight = pix_coord - origin_pix_coords; + + // Bi-linear mixing + float4 temp0 = mix(c00, c10, weight.x); + float4 temp1 = mix(c01, c11, weight.x); + return mix(temp0, temp1, weight.y); +} + +fragment FragmentOut fs_main(VertexOut in [[stage_in]], constant FragmentShaderUniforms& uniforms [[buffer(0)]], + constant PushBlock& push_constants [[buffer(1)]], + texture2d tex [[texture(0), function_constant(pp_texture)]], sampler tex_sampler [[sampler(0), function_constant(pp_texture)]], + texture2d fog_table [[texture(2), function_constant(has_fog_table)]], sampler fog_table_sampler [[sampler(2), function_constant(has_fog_table)]], + texture2d palette [[texture(3), function_constant(has_palette)]], sampler palette_sampler [[sampler(3), function_constant(has_palette)]]) +{ + // Clip inside the box + if (pp_clip_inside) { + if (in.position.x >= push_constants.clip_test.x && in.position.x <= push_constants.clip_test.z + && in.position.y >= push_constants.clip_test.y && in.position.y <= push_constants.clip_test.w) + discard_fragment(); + } + + float4 color; + float4 offset; + + if (is_flat) { + color = in.flat_vtx_base; + offset = in.flat_vtx_offs; + } else { + color = in.vtx_base; + offset = in.vtx_offs; + } + + if (pp_gouraud && !div_pos_z) { + color /= in.vtx_uv.z; + offset /= in.vtx_uv.z; + } + + if (!pp_use_alpha) { + color.a = 1.0; + } + + if (pp_fog_ctrl == 3) { + color = float4(uniforms.sp_fog_col_ram.rgb, fog_mode2(in.vtx_uv.z, uniforms, fog_table, fog_table_sampler)); + } + + if (pp_texture) { + float4 tex_col; + + if (pp_palette == 0) { + if (div_pos_z) { + tex_col = tex.sample(tex_sampler, in.vtx_uv.xy); + } else { + tex_col = tex.sample(tex_sampler, float2(in.vtx_uv.xy / in.vtx_uv.z)); + } + } else { + if (pp_palette == 1) { + tex_col = palette_pixel(tex, tex_sampler, palette, palette_sampler, in.vtx_uv, push_constants); + } else { + tex_col = palette_pixel_bilinear(tex, tex_sampler, palette, palette_sampler, in.vtx_uv, push_constants); + } + } + + if (pp_bump_map) { + float s = PI / 2.0 * (tex_col.a * 15.0 * 16.0 + tex_col.r * 15.0) / 255.0; + float r = 2.0 * PI * (tex_col.g * 15.0 * 16.0 + tex_col.b * 15.0) / 255.0; + tex_col.a = clamp(offset.a + offset.r * sin(s) + offset.g * cos(s) * cos(r - 2.0 * PI * offset.b), 0.0, 1.0); + tex_col.rgb = float3(1.0, 1.0, 1.0); + } else { + if (pp_ignore_tex_a) + tex_col.a = 1.0; + } + + if (pp_shad_instr == 0) { + color = tex_col; + } else if (pp_shad_instr == 1) { + color.rgb *= tex_col.rgb; + color.a = tex_col.a; + } else if (pp_shad_instr == 2) { + color.rgb = mix(color.rgb, tex_col.rgb, tex_col.a); + } else if (pp_shad_instr == 3) { + color *= tex_col; + } + + if (pp_offset && !pp_bump_map) { + color.rgb += offset.rgb; + } + } + + color = color_clamp(color, uniforms); + + if (pp_fog_ctrl == 0) { + color.rgb = mix(color.rgb, uniforms.sp_fog_col_ram.rgb, fog_mode2(in.vtx_uv.z, uniforms, fog_table, fog_table_sampler)); + } + + if (pp_fog_ctrl == 1 && pp_offset && !pp_bump_map) { + color.rgb = mix(color.rgb, uniforms.sp_fog_col_vert.rgb, offset.a); + } + + if (pp_trilinear) + color *= push_constants.trilinear_alpha; + + if (cp_alpha_test) { + color.a = round(color.a * 255.0) / 255.0; + if (uniforms.cp_alpha_test_value > color.a) + discard_fragment(); + color.a = 1.0; + } + + float w; + + if (div_pos_z) { + w = 100000.0 / in.vtx_uv.z; + } else { + w = 100000.0 * in.vtx_uv.z; + } + + float depth = log2(1.0 + max(w, -0.999999)) / 34.0; + + if (dithering) { + constexpr float dither_table[16] = { + 5, 13, 7, 15, + 9, 1, 11, 3, + 6, 14, 4, 12, + 10, 2, 8, 0 + }; + + float r = dither_table[int(fmod(in.position.y, 4.0)) * 4 + int(fmod(in.position.x, 4.0))]; + float4 dv = float4(r, r, r, 1.0) / uniforms.dither_divisor; + color = clamp(floor(color * 255 + dv) / 255, 0, 1); + } + + return FragmentOut { color, depth }; +} +)"; + +static const char BlitShader[] = R"( +#include +using namespace metal; + +struct VertexOut { + float4 position [[position]]; + float2 texCoord; +}; + +vertex VertexOut vs_main(uint vertexID [[vertex_id]]) { + // Predefined positions and texture coordinates for a full-screen quad + float4 positions[4] = { + float4(-1.0, -1.0, 0.0, 1.0), // Bottom-left + float4( 1.0, -1.0, 0.0, 1.0), // Bottom-right + float4(-1.0, 1.0, 0.0, 1.0), // Top-left + float4( 1.0, 1.0, 0.0, 1.0) // Top-right + }; + + float2 texCoords[4] = { + float2(0.0, 1.0), // Bottom-left + float2(1.0, 1.0), // Bottom-right + float2(0.0, 0.0), // Top-left + float2(1.0, 0.0) // Top-right + }; + + VertexOut out; + out.position = positions[vertexID]; + out.texCoord = texCoords[vertexID]; + return out; +} + +fragment float4 fs_main(VertexOut in [[stage_in]], + texture2d sourceTexture [[texture(0)]]) { + constexpr sampler textureSampler(mag_filter::linear, min_filter::linear); + return sourceTexture.sample(textureSampler, in.texCoord); +} +)"; + +static const char ModVolShaderSource[] = R"( +#include + +using namespace metal; + +constant bool div_pos_z [[function_constant(0)]]; + +struct VertexShaderUniforms +{ + float4x4 ndc_mat; +}; + +struct VertexIn +{ + float4 in_pos [[attribute(0)]]; +}; + +struct VertexOut +{ + float depth; + float4 position [[position]]; +}; + +struct FragmentOut +{ + float4 color [[color(0)]]; + float depth [[depth(any)]]; +}; + +struct PushBlock +{ + float sp_shader_color; +}; + +vertex VertexOut vs_main(VertexIn in [[stage_in]], constant VertexShaderUniforms& uniforms [[buffer(0)]]) { + float4 vpos = uniforms.ndc_mat * in.in_pos; + + VertexOut out = {}; + + if (div_pos_z) { + vpos /= vpos.z; + vpos.z = vpos.w; + out.depth = vpos.w; + } else { + out.depth = vpos.z; + vpos.w = 1.0; + vpos.z = 0.0; + } + + out.position = vpos; + return out; +} + +fragment FragmentOut fs_main(VertexOut in [[stage_in]], + constant PushBlock& push_constants [[buffer(1)]]) { + FragmentOut out = {}; + + float w; + + if (div_pos_z) { + w = 100000.0 / in.depth; + } else { + w = 100000.0 * in.depth; + } + + out.depth = log2(1.0 + max(w, -0.999999)) / 34.0; + out.color = float4(0.0, 0.0, 0.0, push_constants.sp_shader_color); + return out; +} +)"; + +static const char QuadShaderSource[] = R"( +#include + +using namespace metal; + +constant bool rotate_quad [[function_constant(0)]]; +constant bool ignore_tex_alpha [[function_constant(1)]]; + +struct VertexIn +{ + float3 pos [[attribute(0)]]; + float2 uv [[attribute(1)]]; +}; + +struct VertexOut +{ + float4 position [[position]]; + float2 uv; +}; + +struct PushBlock +{ + float4 color; +}; + +vertex VertexOut vs_main(VertexIn in [[stage_in]]) { + VertexOut out = {}; + + if (!rotate_quad) { + out.position = float4(in.pos, 1.0); + } else { + out.position = float4(-in.pos.y, in.pos.x, in.pos.z, 1.0); + } + + out.uv = in.uv; + return out; +} + +fragment float4 fs_main(VertexOut in [[stage_in]], + constant PushBlock& push_constants [[buffer(0)]], + texture2d tex [[texture(0)]], + sampler samp [[sampler(0)]]) { + float4 color; + + if (ignore_tex_alpha) { + color.rgb = push_constants.color.rgb * tex.sample(samp, in.uv).rgb; + color.a = push_constants.color.a; + } else { + color = push_constants.color * tex.sample(samp, in.uv); + } + + return color; +} +)"; + +static const char N2VertexShaderSource[] = R"( +#include + +using namespace metal; + +constant bool pp_gouraud [[function_constant(0)]]; + +constant bool is_flat = pp_gouraud == 0; +constant bool is_not_flat = !is_flat; + +struct VertexShaderUniforms +{ + float4x4 ndc_mat; +}; + +struct N2VertexShaderUniforms +{ + float4x4 mv_mat; + float4x4 normal_mat; + float4x4 proj_mat; + int2 env_mapping; + int bump_mapping; + int poly_number; + + float2 gloss_coef; + int2 constant_color; +}; + +struct N2VertexIn +{ + float4 in_pos [[attribute(0)]]; + float4 in_base [[attribute(1)]]; + float4 in_offs [[attribute(2)]]; + float2 in_uv [[attribute(3)]]; + float3 in_normal [[attribute(7)]]; +}; + +struct VertexOut +{ + float4 flat_vtx_base [[flat, function_constant(is_flat)]]; + float4 flat_vtx_offs [[flat, function_constant(is_flat)]]; + float4 vtx_base [[function_constant(is_not_flat)]]; + float4 vtx_offs [[function_constant(is_not_flat)]]; + float3 vtx_uv; + float4 position [[position]]; +}; + +#define PI 3.1415926 + +#define LMODE_SINGLE_SIDED 0 +#define LMODE_DOUBLE_SIDED 1 +#define LMODE_DOUBLE_SIDED_WITH_TOLERANCE 2 +#define LMODE_SPECIAL_EFFECT 3 +#define LMODE_THIN_SURFACE 4 +#define LMODE_BUMP_MAP 5 + +#define ROUTING_SPEC_TO_OFFSET 1 +#define ROUTING_DIFF_TO_OFFSET 2 +#define ROUTING_ATTENUATION 1 // not handled +#define ROUTING_FOG 2 // not handled +#define ROUTING_ALPHA 4 +#define ROUTING_SUB 8 + +struct N2Light +{ + float4 color; + float4 direction; // For parallel/spot + float4 position; // For spot/point + + int parallel; + int routing; + int dmode; + int smode; + + int2 diffuse; + int2 specular; + + float attn_dist_a; + float attn_dist_b; + float attn_angle_a; // For spot + float attn_angle_b; + + int dist_attn_mode; // For spot/point + int _pad1; + int _pad2; + int _pad3; +}; + +struct N2Lights +{ + N2Light lights[16]; + float4 ambient_base[2]; + float4 ambient_offset[2]; + int2 ambient_material_base; + int2 ambient_material_offset; + int light_count; + int use_base_over; + int bump_id0; + int bump_id1; +}; + +void w_divide(thread float4& vpos, float4x4 ndc_mat, thread VertexOut& out) +{ + vpos = float4(vpos.xy / vpos.w, 1.0 / vpos.w, 1.0); + vpos = ndc_mat * vpos; + if (pp_gouraud == 1) { + if (is_flat) { + out.flat_vtx_base *= vpos.z; + out.flat_vtx_offs *= vpos.z; + } else { + out.vtx_base *= vpos.z; + out.vtx_offs *= vpos.z; + } + } + out.vtx_uv = float3(out.vtx_uv.xy * vpos.z, vpos.z); + vpos.w = 1.0; + vpos.z = 0.0; +} + +void compute_colors(constant N2VertexShaderUniforms& n2_uniforms, constant N2Lights& n2_lights, + thread float4& base_col, thread float4& offset_col, int vol_idx, float3 position, float3 normal) +{ + if (n2_uniforms.constant_color[vol_idx] == 1) + return; + + float3 diffuse = float3(0.0); + float3 specular = float3(0.0); + float diffuse_alpha = 0.0; + float specular_alpha = 0.0; + float3 reflect_dir = reflect(normalize(position), normal); + const float BASE_FACTOR = 2.0; + + for (int i = 0; i < n2_lights.light_count; i++) + { + float3 light_dir; // direction to the light + float3 light_color = n2_lights.lights[i].color.rgb; + if (n2_lights.lights[i].parallel == 1) + { + light_dir = normalize(n2_lights.lights[i].direction.xyz); + } + else + { + light_dir = normalize(n2_lights.lights[i].position.xyz - position); + if (n2_lights.lights[i].attn_dist_a != 1.0 || n2_lights.lights[i].attn_dist_b != 0.0) + { + float distance = length(n2_lights.lights[i].position.xyz - position); + if (n2_lights.lights[i].dist_attn_mode == 0) + distance = 1.0 / distance; + light_color *= clamp(n2_lights.lights[i].attn_dist_b * distance + n2_lights.lights[i].attn_dist_a, 0.0, 1.0); + } + if (n2_lights.lights[i].attn_angle_a != 1.0 || n2_lights.lights[i].attn_angle_b != 0.0) + { + float3 spot_dir = n2_lights.lights[i].direction.xyz; + float cos_angle = 1.0 - max(0.0, dot(light_dir, spot_dir)); + light_color *= clamp(cos_angle * n2_lights.lights[i].attn_angle_b + n2_lights.lights[i].attn_angle_a, 0.0, 1.0); + } + } + if (n2_lights.lights[i].diffuse[vol_idx] == 1) + { + float factor = (n2_lights.lights[i].routing & ROUTING_SUB) != 0 ? -BASE_FACTOR : BASE_FACTOR; + if (n2_lights.lights[i].dmode == LMODE_SINGLE_SIDED) + factor *= max(dot(normal, light_dir), 0.0); + else if (n2_lights.lights[i].dmode == LMODE_DOUBLE_SIDED) + factor *= abs(dot(normal, light_dir)); + + if ((n2_lights.lights[i].routing & ROUTING_ALPHA) != 0) + diffuse_alpha += light_color.r * factor; + else + { + if ((n2_lights.lights[i].routing & ROUTING_DIFF_TO_OFFSET) == 0) + diffuse += light_color * factor * base_col.rgb; + else + specular += light_color * factor * base_col.rgb; + } + } + if (n2_lights.lights[i].specular[vol_idx] == 1) + { + float factor = (n2_lights.lights[i].routing & ROUTING_SUB) != 0 ? -BASE_FACTOR : BASE_FACTOR; + if (n2_lights.lights[i].smode == LMODE_SINGLE_SIDED) + factor *= clamp(powr(max(dot(light_dir, reflect_dir), 0.0), n2_uniforms.gloss_coef[vol_idx]), 0.0, 1.0); + else if (n2_lights.lights[i].smode == LMODE_DOUBLE_SIDED) + factor *= clamp(powr(abs(dot(light_dir, reflect_dir)), n2_uniforms.gloss_coef[vol_idx]), 0.0, 1.0); + + if ((n2_lights.lights[i].routing & ROUTING_ALPHA) != 0) + specular_alpha += light_color.r * factor; + else + { + if ((n2_lights.lights[i].routing & ROUTING_SPEC_TO_OFFSET) == 0) + diffuse += light_color * factor * offset_col.rgb; + else + specular += light_color * factor * offset_col.rgb; + } + } + } + // ambient light + if (n2_lights.ambient_material_base[vol_idx] == 1) + diffuse += n2_lights.ambient_base[vol_idx].rgb * base_col.rgb; + else + diffuse += n2_lights.ambient_base[vol_idx].rgb; + if (n2_lights.ambient_material_offset[vol_idx] == 1) + specular += n2_lights.ambient_offset[vol_idx].rgb * offset_col.rgb; + else + specular += n2_lights.ambient_offset[vol_idx].rgb; + base_col.rgb = diffuse; + offset_col.rgb = specular; + + base_col.a += diffuse_alpha; + offset_col.a += specular_alpha; + if (n2_lights.use_base_over == 1) + { + float4 overflow = max(base_col - float4(1.0), 0.0); + offset_col += overflow; + } + base_col = clamp(base_col, 0.0, 1.0); + offset_col = clamp(offset_col, 0.0, 1.0); +} + +void compute_env_map(thread float3& uv, float3 position, float3 normal) +{ + // Spherical mapping + // float3 r = reflect(normalize(position), normal); + // float m = 2.0 * sqrt(r.x * r.x + r.y * r.y + (r.z + 1.0) * (r.z + 1.0)); + // uv.xy += r.xy / m + 0.5; + + // Cheap env mapping + uv.xy += (normal.xy / 2.0) + 0.5; + uv.xy = clamp(uv.xy, 0.0, 1.0); +} + +vertex VertexOut vs_main(N2VertexIn in [[stage_in]], + constant VertexShaderUniforms& uniforms [[buffer(0)]], + constant N2VertexShaderUniforms& n2_uniforms [[buffer(1)]], + constant N2Lights& n2_lights [[buffer(2)]]) +{ + float4 vpos = n2_uniforms.mv_mat * in.in_pos; + + float3 vnorm = normalize(float3x3(n2_uniforms.normal_mat[0].xyz, + n2_uniforms.normal_mat[1].xyz, + n2_uniforms.normal_mat[2].xyz) * in.in_normal); + + float4 base = in.in_base; + float4 offset = in.in_offs; + + // TODO bump mapping + if (n2_uniforms.bump_mapping == 0) { + compute_colors(n2_uniforms, n2_lights, + base, offset, 0, vpos.xyz, vnorm); + base += offset; + } + + VertexOut out = {}; + + if (is_flat) { + out.flat_vtx_base = base; + out.flat_vtx_offs = offset; + } else { + out.vtx_base = base; + out.vtx_offs = offset; + } + + out.vtx_uv.xy = in.in_uv; + + if (n2_uniforms.env_mapping[0] == 1) + compute_env_map(out.vtx_uv, vpos.xyz, vnorm); + + vpos = n2_uniforms.proj_mat * vpos; + w_divide(vpos, uniforms.ndc_mat, out); + + out.position = vpos; + return out; +} +)"; + +extern const char MTLN2ModVolVertexShaderSource[] = R"( +#include + +using namespace metal; + +struct VertexShaderUniforms +{ + float4x4 ndc_mat; +}; + +struct N2VertexShaderUniforms +{ + float4x4 mv_mat; + float4x4 normal_mat; + float4x4 proj_mat; + int2 env_mapping; + int bump_mapping; + int poly_number; + + float2 gloss_coef; + int2 constant_color; +}; + +struct VertexIn +{ + float4 in_pos [[attribute(0)]]; +}; + +struct VertexOut +{ + float depth; + float4 position [[position]]; +}; + +void w_divide(thread float4& vpos, float4x4 ndc_mat, thread VertexOut& out) +{ + vpos = float4(vpos.xy / vpos.w, 1.0 / vpos.w, 1.0); + vpos = ndc_mat * vpos; + out.depth = vpos.z; + vpos.w = 1.0; + vpos.z = 0.0; +} + +vertex VertexOut vs_main(VertexIn in [[stage_in]], + constant VertexShaderUniforms& uniforms [[buffer(0)]], + constant N2VertexShaderUniforms& n2_uniforms [[buffer(1)]]) +{ + VertexOut out = {}; + + float4 vpos = n2_uniforms.mv_mat * in.in_pos; + vpos.z = min(vpos.z, -0.001); + vpos = n2_uniforms.proj_mat * vpos; + w_divide(vpos, uniforms.ndc_mat, out); + + out.position = vpos; + + return out; +} +)"; + +MetalShaders::MetalShaders() { + auto device = MetalContext::Instance()->GetDevice(); + + MTLCompileOptions* compileOptions = [[MTLCompileOptions alloc] init]; + [compileOptions setFastMathEnabled:YES]; + + NSError* error = nil; + fragmentShaderLibrary = [device newLibraryWithSource:[NSString stringWithUTF8String:FragmentShaderSource] options:compileOptions error:&error]; + fragmentShaderConstants = [[MTLFunctionConstantValues alloc] init]; + + if (!fragmentShaderLibrary) { + ERROR_LOG(RENDERER, "%s", [[error localizedDescription] UTF8String]); + assert(false); + } + + vertexShaderLibrary = [device newLibraryWithSource:[NSString stringWithUTF8String:VertexShaderSource] options:compileOptions error:&error]; + vertexShaderConstants = [[MTLFunctionConstantValues alloc] init]; + + if (!vertexShaderLibrary) { + ERROR_LOG(RENDERER, "%s", [[error localizedDescription] UTF8String]); + assert(false); + } + + n2VertexShaderLibrary = [device newLibraryWithSource:[NSString stringWithUTF8String:N2VertexShaderSource] options:compileOptions error:&error]; + + if (!n2VertexShaderLibrary) { + ERROR_LOG(RENDERER, "%s", [[error localizedDescription] UTF8String]); + assert(false); + } + + blitShaderLibrary = [device newLibraryWithSource:[NSString stringWithUTF8String:BlitShader] options:compileOptions error:&error]; + + if (!blitShaderLibrary) { + ERROR_LOG(RENDERER, "%s", [[error localizedDescription] UTF8String]); + assert(false); + } + + blitVertexShader = [blitShaderLibrary newFunctionWithName:@"vs_main" constantValues:[[MTLFunctionConstantValues alloc] init] error:&error]; + + if (!blitVertexShader) { + ERROR_LOG(RENDERER, "%s", [[error localizedDescription] UTF8String]); + assert(false); + } + + blitFragmentShader = [blitShaderLibrary newFunctionWithName:@"fs_main" constantValues:[[MTLFunctionConstantValues alloc] init] error:&error]; + + if (!blitFragmentShader) { + ERROR_LOG(RENDERER, "%s", [[error localizedDescription] UTF8String]); + assert(false); + } + + modVolShaderLibrary = [device newLibraryWithSource:[NSString stringWithUTF8String:ModVolShaderSource] options:compileOptions error:&error]; + modVolShaderConstants = [[MTLFunctionConstantValues alloc] init]; + + if (!modVolShaderLibrary) { + ERROR_LOG(RENDERER, "%s", [[error localizedDescription] UTF8String]); + assert(false); + } + + n2ModVolVertexShaderLibrary = [device newLibraryWithSource:[NSString stringWithUTF8String:MTLN2ModVolVertexShaderSource] options:compileOptions error:&error]; + + if (!n2ModVolVertexShaderLibrary) { + ERROR_LOG(RENDERER, "%s", [[error localizedDescription] UTF8String]); + assert(false); + } + + quadShaderLibrary = [device newLibraryWithSource:[NSString stringWithUTF8String:QuadShaderSource] options:compileOptions error:&error]; + quadShaderConstants = [[MTLFunctionConstantValues alloc] init]; + + if (!quadShaderLibrary) { + ERROR_LOG(RENDERER, "%s", [[error localizedDescription] UTF8String]); + assert(false); + } + +} + +id MetalShaders::compileShader(const MetalVertexShaderParams ¶ms) { + [vertexShaderConstants setConstantValue:¶ms.gouraud type:MTLDataTypeBool atIndex:0]; + + NSError* error = nil; + + id function; + + if (params.naomi2) { + function = [n2VertexShaderLibrary newFunctionWithName:@"vs_main" constantValues:vertexShaderConstants error:&error]; + } else { + [vertexShaderConstants setConstantValue:¶ms.divPosZ type:MTLDataTypeBool atIndex:1]; + + function = [vertexShaderLibrary newFunctionWithName:@"vs_main" constantValues:vertexShaderConstants error:&error]; + } + + if (!function) { + ERROR_LOG(RENDERER, "%s", [[error localizedDescription] UTF8String]); + assert(false); + } + + return function; +} + +id MetalShaders::compileShader(const MetalFragmentShaderParams ¶ms) { + [fragmentShaderConstants setConstantValue:¶ms.alphaTest type:MTLDataTypeBool atIndex:0]; + [fragmentShaderConstants setConstantValue:¶ms.insideClipTest type:MTLDataTypeBool atIndex:1]; + [fragmentShaderConstants setConstantValue:¶ms.useAlpha type:MTLDataTypeBool atIndex:2]; + [fragmentShaderConstants setConstantValue:¶ms.texture type:MTLDataTypeBool atIndex:3]; + [fragmentShaderConstants setConstantValue:¶ms.ignoreTexAlpha type:MTLDataTypeBool atIndex:4]; + [fragmentShaderConstants setConstantValue:¶ms.shaderInstr type:MTLDataTypeInt atIndex:5]; + [fragmentShaderConstants setConstantValue:¶ms.offset type:MTLDataTypeBool atIndex:6]; + [fragmentShaderConstants setConstantValue:¶ms.fog type:MTLDataTypeInt atIndex:7]; + [fragmentShaderConstants setConstantValue:¶ms.gouraud type:MTLDataTypeBool atIndex:8]; + [fragmentShaderConstants setConstantValue:¶ms.bumpmap type:MTLDataTypeBool atIndex:9]; + [fragmentShaderConstants setConstantValue:¶ms.clamping type:MTLDataTypeBool atIndex:10]; + [fragmentShaderConstants setConstantValue:¶ms.trilinear type:MTLDataTypeBool atIndex:11]; + [fragmentShaderConstants setConstantValue:¶ms.palette type:MTLDataTypeInt atIndex:12]; + [fragmentShaderConstants setConstantValue:¶ms.divPosZ type:MTLDataTypeBool atIndex:13]; + [fragmentShaderConstants setConstantValue:¶ms.dithering type:MTLDataTypeBool atIndex:14]; + + NSError* error = nil; + + id function = [fragmentShaderLibrary newFunctionWithName:@"fs_main" constantValues:fragmentShaderConstants error:&error]; + + if (!function) { + ERROR_LOG(RENDERER, "%s", [[error localizedDescription] UTF8String]); + assert(false); + } + + return function; +} + +id MetalShaders::compileShader(const MetalModVolShaderParams ¶ms) { + [modVolShaderConstants setConstantValue:¶ms.divPosZ type:MTLDataTypeBool atIndex:0]; + + NSError* error = nil; + + id function; + + if (params.naomi2) { + function = [n2ModVolVertexShaderLibrary newFunctionWithName:@"vs_main" constantValues:[[MTLFunctionConstantValues alloc] init] error:&error]; + } else { + function = [modVolShaderLibrary newFunctionWithName:@"vs_main" constantValues:modVolShaderConstants error:&error]; + } + + if (!function) { + ERROR_LOG(RENDERER, "%s", [[error localizedDescription] UTF8String]); + assert(false); + } + + return function; +} + +id MetalShaders::compileShader(bool divPosZ) { + [modVolShaderConstants setConstantValue:&divPosZ type:MTLDataTypeBool atIndex:0]; + + NSError* error = nil; + + id function = [modVolShaderLibrary newFunctionWithName:@"fs_main" constantValues:modVolShaderConstants error:&error]; + + if (!function) { + ERROR_LOG(RENDERER, "%s", [[error localizedDescription] UTF8String]); + assert(false); + } + + return function; +} + +id MetalShaders::compileQuadVertexShader(bool rotate) +{ + [quadShaderConstants setConstantValue:&rotate type:MTLDataTypeBool atIndex:0]; + + NSError* error = nil; + + id function = [quadShaderLibrary newFunctionWithName:@"vs_main" constantValues:quadShaderConstants error:&error]; + if (!function) { + ERROR_LOG(RENDERER, "%s", [[error localizedDescription] UTF8String]); + assert(false); + } + + return function; +} + +id MetalShaders::compileQuadFragmentShader(bool ignoreTexAlpha) +{ + [quadShaderConstants setConstantValue:&ignoreTexAlpha type:MTLDataTypeBool atIndex:1]; + + NSError* error = nil; + + id function = [quadShaderLibrary newFunctionWithName:@"fs_main" constantValues:quadShaderConstants error:&error]; + if (!function) { + ERROR_LOG(RENDERER, "%s", [[error localizedDescription] UTF8String]); + assert(false); + } + + return function; +} + + diff --git a/core/rend/metal/metal_texture.h b/core/rend/metal/metal_texture.h new file mode 100644 index 0000000000..fc72813f38 --- /dev/null +++ b/core/rend/metal/metal_texture.h @@ -0,0 +1,205 @@ +/* + Copyright 2025 flyinghead + + This file is part of Flycast. + + Flycast is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + Flycast is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Flycast. If not, see . +*/ + +#pragma once +#include "rend/TexCache.h" +#include "metal_context.h" +#include "metal.h" + +#include +#include + +class MetalTexture final : public BaseTextureCacheData +{ +public: + MetalTexture(TSP tsp = {}, TCW tcw = {}) : BaseTextureCacheData(tsp, tcw) {} + + std::string GetId() override { return std::to_string([texture gpuResourceID]._impl); } + id GetTexture() const { return texture; } + void UploadToGPU(int width, int height, const u8 *data, bool mipmapped, bool mipmapsIncluded = false) override; + void SetCommandBuffer(id commandBuffer) { this->commandBuffer = commandBuffer; } + void SetTexture(id texture, u32 width, u32 height) { + this->texture = texture; + this->width = width; + this->height = height; + } + void SetInFlight(bool inFlight) { + this->isInFlight = inFlight; + } + void deferDeleteResource(MetalFlightManager *manager); + id GetReadOnlyTexture() const { return readOnlyTexture ? readOnlyTexture : texture; } + void CreateReadOnlyCopy(id commandBuffer); + +private: + void Init(u32 width, u32 height, MTLPixelFormat format, u32 dataSize, bool mipmapped, bool mipmapsIncluded); + void SetImage(u32 srcSize, const void *srcData, bool genMipmaps); + void GenerateMipmaps(); + + MTLPixelFormat format = MTLPixelFormatInvalid; + u32 width = 0; + u32 height = 0; + u32 mipmapLevels = 1; + id commandBuffer = nil; + id texture = nil; + id readOnlyTexture = nil; + bool isInFlight = false; + + friend class MetalTextureCache; +}; + +class MetalSamplers +{ +public: + explicit MetalSamplers(); + ~MetalSamplers(); + + static const u32 TSP_Mask = 0x7ef00; + + void term() { + samplers.clear(); + } + + id GetSampler(const PolyParam& poly, bool punchThrough, bool texture1 = false) { + TSP tsp = texture1 ? poly.tsp1 : poly.tsp; + if (poly.texture != nullptr && poly.texture->gpuPalette) + tsp.FilterMode = 0; + else if (config::TextureFiltering == 1) + tsp.FilterMode = 0; + else if (config::TextureFiltering == 2) + tsp.FilterMode = 1; + return GetSampler(tsp, punchThrough); + } + + id GetSampler(TSP tsp, bool punchThrough = false) { + const u32 hash = (tsp.full & TSP_Mask) | punchThrough; // MipMapD, FilterMode, ClampU, ClampV, FlipU, FlipV + id sampler = samplers[hash]; + + if (!sampler) { + auto desc = [[MTLSamplerDescriptor alloc] init]; + + if (tsp.FilterMode != 0) { + if (punchThrough) { + [desc setMinFilter:MTLSamplerMinMagFilterLinear]; + [desc setMagFilter:MTLSamplerMinMagFilterLinear]; + [desc setMipFilter:MTLSamplerMipFilterNearest]; + } else { + [desc setMinFilter:MTLSamplerMinMagFilterLinear]; + [desc setMagFilter:MTLSamplerMinMagFilterLinear]; + [desc setMipFilter:MTLSamplerMipFilterLinear]; + } + } + else { + [desc setMinFilter:MTLSamplerMinMagFilterNearest]; + [desc setMagFilter:MTLSamplerMinMagFilterNearest]; + [desc setMipFilter:MTLSamplerMipFilterNearest]; + } + + auto sRepeat = tsp.ClampU ? MTLSamplerAddressModeClampToEdge : tsp.FlipU ? MTLSamplerAddressModeMirrorRepeat : MTLSamplerAddressModeRepeat; + auto tRepeat = tsp.ClampV ? MTLSamplerAddressModeClampToEdge : tsp.FlipV ? MTLSamplerAddressModeMirrorRepeat : MTLSamplerAddressModeRepeat; + + [desc setSAddressMode:sRepeat]; + [desc setTAddressMode:tRepeat]; + [desc setRAddressMode:tRepeat]; + [desc setCompareFunction:MTLCompareFunctionNever]; + if (tsp.FilterMode == 1 && !punchThrough) { + [desc setMaxAnisotropy:config::AnisotropicFiltering]; + } else { + [desc setMaxAnisotropy:1]; + } + + sampler = [MetalContext::Instance()->GetDevice() newSamplerStateWithDescriptor:desc]; + + samplers.emplace(hash, sampler).first->second; + } + + return sampler; + } + +private: + std::unordered_map> samplers; +}; + +class MetalTextureCache final : public BaseTextureCache +{ +public: + MetalTextureCache() {} + + void SetCurrentIndex(int index) + { + if (index == (int)currentIndex) + return; + if (currentIndex < inFlightTextures.size()) + std::for_each(inFlightTextures[currentIndex].begin(), inFlightTextures[currentIndex].end(), + [](MetalTexture *texture) { + texture->SetInFlight(false); + texture->readOnlyTexture = nil; + }); + currentIndex = index; + EmptyTrash(inFlightTextures); + } + + bool IsInFlight(MetalTexture *texture, bool previous) + { + for (u32 i = 0; i < inFlightTextures.size(); i++) + if ((!previous || i != currentIndex) + && inFlightTextures[i].find(texture) != inFlightTextures[i].end()) + return true; + return false; + } + + void SetInFlight(MetalTexture *texture) + { + texture->SetInFlight(true); + inFlightTextures[currentIndex].insert(texture); + } + + void Cleanup(); + + void Clear() + { + for (auto& set : inFlightTextures) + { + for (MetalTexture *tex : set) + tex->SetInFlight(false); + set.clear(); + } + BaseTextureCache::Clear(); + } + +private: + bool clearTexture(MetalTexture *tex) + { + for (auto& set : inFlightTextures) + set.erase(tex); + + return tex->Delete(); + } + + template + void EmptyTrash(T& v) + { + if (v.size() < currentIndex + 1) + v.resize(currentIndex + 1); + else + v[currentIndex].clear(); + } + + std::vector> inFlightTextures; + u32 currentIndex = ~0; +}; \ No newline at end of file diff --git a/core/rend/metal/metal_texture.mm b/core/rend/metal/metal_texture.mm new file mode 100644 index 0000000000..091ba7fe7f --- /dev/null +++ b/core/rend/metal/metal_texture.mm @@ -0,0 +1,272 @@ +/* + Copyright 2025 flyinghead + + This file is part of Flycast. + + Flycast is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + Flycast is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Flycast. If not, see . +*/ + +#include "metal_texture.h" + +void MetalTexture::UploadToGPU(int width, int height, const u8 *data, bool mipmapped, bool mipmapsIncluded) +{ + MTLPixelFormat format = MTLPixelFormatInvalid; + u32 dataSize = width * height * 2; + switch (tex_type) + { + case TextureType::_5551: + format = MTLPixelFormatA1BGR5Unorm; + break; + case TextureType::_565: + format = MTLPixelFormatB5G6R5Unorm; + break; + case TextureType::_4444: + format = MTLPixelFormatABGR4Unorm; + break; + case TextureType::_8888: + format = MTLPixelFormatRGBA8Unorm; + dataSize *= 2; + break; + case TextureType::_8: + format = MTLPixelFormatR8Unorm; + dataSize /= 2; + break; + } + if (mipmapsIncluded) + { + int w = width / 2; + u32 size = dataSize / 4; + while (w) + { + dataSize += ((size + 3) >> 2) << 2; // offset must be a multiple of 4 + size /= 4; + w /= 2; + } + } + + if (width != this->width || height != this->height + || format != this->format || this->texture == nil) + Init(width, height, format, dataSize, mipmapped, mipmapsIncluded); + + SetImage(dataSize, data, mipmapped && !mipmapsIncluded); +} + +void MetalTexture::Init(u32 width, u32 height, MTLPixelFormat format, u32 dataSize, bool mipmapped, bool mipmapsIncluded) +{ + this->width = width; + this->height = height; + this->format = format; + mipmapLevels = 1; + if (mipmapped) + mipmapLevels += floor(log2(std::max(width, height))); + + MTLTextureDescriptor *desc = [[MTLTextureDescriptor alloc] init]; + + [desc setWidth:width]; + [desc setHeight:height]; + [desc setPixelFormat:format]; + [desc setMipmapLevelCount:mipmapLevels]; + [desc setStorageMode:MTLStorageModeShared]; + [desc setUsage:MTLTextureUsageShaderRead]; + + auto device = MetalContext::Instance()->GetDevice(); + + texture = [device newTextureWithDescriptor:desc]; +} + +void MetalTexture::SetImage(u32 srcSize, const void *srcData, bool genMipmaps) { + u32 bpp; + switch (tex_type) { + case TextureType::_8888: + bpp = 4; + break; + case TextureType::_8: + bpp = 1; + break; + default: + bpp = 2; + break; + } + + if (mipmapLevels > 1 && !genMipmaps && tex_type != TextureType::_8888) + { + u8 *src = (u8 *)srcData; + u32 dataOffset = 0; + + for (u32 i = 0; i < mipmapLevels; i++) { + const u32 size = (1 << (2 * i)) * 2; + + u32 mipLevel = mipmapLevels - i - 1; + u32 mipWidth = std::max(texture.width >> mipLevel, 1ul); + u32 mipHeight = std::max(texture.height >> mipLevel, 1ul); + + MTLRegion region = MTLRegionMake2D(0, 0, mipWidth, mipHeight); + [texture replaceRegion:region + mipmapLevel:mipLevel + withBytes:src + bytesPerRow:mipWidth * bpp]; + + dataOffset += ((size + 3) >> 2) << 2; + src += size; + } + } + else + { + u32 rowBytes = texture.width * bpp; + + MTLRegion region = MTLRegionMake2D(0, 0, texture.width, texture.height); + [texture replaceRegion:region + mipmapLevel:0 + withBytes:srcData + bytesPerRow:rowBytes]; + + if (mipmapLevels > 1 && genMipmaps) { + GenerateMipmaps(); + } + } +} + +void MetalTexture::GenerateMipmaps() +{ + verify((bool)commandBuffer); + [commandBuffer setLabel:@"Mipmap Generation"]; + + id blitEncoder = [commandBuffer blitCommandEncoder]; + + u32 mipWidth = this->width; + u32 mipHeight = this->height; + + for (u32 i = 1; i < mipmapLevels; i++) { + u32 srcWidth = mipWidth; + u32 srcHeight = mipHeight; + + mipWidth = std::max(mipWidth / 2, 1u); + mipHeight = std::max(mipHeight / 2, 1u); + + MTLOrigin srcOrigin = MTLOriginMake(0, 0, 0); + MTLSize srcSize = MTLSizeMake(srcWidth, srcHeight, 1); + + MTLOrigin dstOrigin = MTLOriginMake(0, 0, 0); + MTLSize dstSize = MTLSizeMake(mipWidth, mipHeight, 1); + + [blitEncoder copyFromTexture:texture + sourceSlice:0 + sourceLevel:i - 1 + sourceOrigin:srcOrigin + sourceSize:srcSize + toTexture:texture + destinationSlice:0 + destinationLevel:i + destinationOrigin:dstOrigin]; + } + + [blitEncoder endEncoding]; +} + +void MetalTexture::deferDeleteResource(MetalFlightManager *manager) +{ + class ResourceDeleter : public MetalDeletable + { + public: + ResourceDeleter(MetalTexture *texture) + { + std::swap(this->texture, texture->texture); + std::swap(this->readOnlyTexture, texture->readOnlyTexture); + } + + ~ResourceDeleter() override { + [texture setPurgeableState:MTLPurgeableStateEmpty]; + texture = nil; + [readOnlyTexture setPurgeableState:MTLPurgeableStateEmpty]; + readOnlyTexture = nil; + } + + private: + id texture = nil; + id readOnlyTexture; + }; + manager->addToFlight(new ResourceDeleter(this)); +} + +void MetalTexture::CreateReadOnlyCopy(id commandBuffer) +{ + if (!texture || readOnlyTexture) + return; + + MTLTextureDescriptor *desc = [[MTLTextureDescriptor alloc] init]; + [desc setWidth:texture.width]; + [desc setHeight:texture.height]; + [desc setPixelFormat:texture.pixelFormat]; + [desc setMipmapLevelCount:texture.mipmapLevelCount]; + [desc setStorageMode:MTLStorageModePrivate]; + [desc setUsage:MTLTextureUsageShaderRead]; + + readOnlyTexture = [texture.device newTextureWithDescriptor:desc]; + [readOnlyTexture setLabel:@"RTT Read-Only Copy"]; + + id blitEncoder = [commandBuffer blitCommandEncoder]; + [blitEncoder copyFromTexture:texture + sourceSlice:0 + sourceLevel:0 + sourceOrigin:MTLOriginMake(0, 0, 0) + sourceSize:MTLSizeMake(texture.width, texture.height, 1) + toTexture:readOnlyTexture + destinationSlice:0 + destinationLevel:0 + destinationOrigin:MTLOriginMake(0, 0, 0)]; + + for (NSUInteger level = 1; level < texture.mipmapLevelCount; level++) { + NSUInteger mipWidth = MAX(texture.width >> level, 1); + NSUInteger mipHeight = MAX(texture.height >> level, 1); + + [blitEncoder copyFromTexture:texture + sourceSlice:0 + sourceLevel:level + sourceOrigin:MTLOriginMake(0, 0, 0) + sourceSize:MTLSizeMake(mipWidth, mipHeight, 1) + toTexture:readOnlyTexture + destinationSlice:0 + destinationLevel:level + destinationOrigin:MTLOriginMake(0, 0, 0)]; + } + + [blitEncoder endEncoding]; +} + +MetalSamplers::MetalSamplers() = default; +MetalSamplers::~MetalSamplers() { + term(); +} + +void MetalTextureCache::Cleanup() +{ + std::vector list; + + u32 TargetFrame = std::max((u32)120, FrameCount) - 120; + + for (const auto& [id, texture] : cache) + { + if (texture.dirty && texture.dirty < TargetFrame) + list.push_back(id); + + if (list.size() > 5) + break; + } + + for (u64 id : list) + { + if (clearTexture(&cache[id])) + cache.erase(id); + } +} \ No newline at end of file diff --git a/core/rend/sorter.cpp b/core/rend/sorter.cpp index 787fba0f14..bbec00370f 100644 --- a/core/rend/sorter.cpp +++ b/core/rend/sorter.cpp @@ -18,7 +18,7 @@ #include -// Vulkan and DirectX use the color values of the first vertex for flat shaded triangle strips. +// Vulkan, DirectX, and Metal use the color values of the first vertex for flat shaded triangle strips. // On Dreamcast the last vertex is the provoking one so we must copy it onto the first. void setFirstProvokingVertex(rend_context& rendContext) { diff --git a/core/rend/transform_matrix.h b/core/rend/transform_matrix.h index d793272725..54936b0914 100644 --- a/core/rend/transform_matrix.h +++ b/core/rend/transform_matrix.h @@ -55,7 +55,7 @@ inline static void getPvrFramebufferSize(const rend_context& rendCtx, int& width // +Y is up in clip, NDC and framebuffer coordinates // Vulkan: // +Y is down in clip, NDC and framebuffer coordinates -// DirectX9: +// DirectX9 & Metal: // +Y is up in clip and NDC coordinates, but down in framebuffer coordinates // Y must also be flipped for render-to-texture so that the top of the texture comes first enum CoordSystem { COORD_OPENGL, COORD_VULKAN, COORD_DIRECTX }; diff --git a/core/rend/vulkan/vulkan_renderer.cpp b/core/rend/vulkan/vulkan_renderer.cpp index c40fc989be..91e73deffe 100644 --- a/core/rend/vulkan/vulkan_renderer.cpp +++ b/core/rend/vulkan/vulkan_renderer.cpp @@ -316,6 +316,7 @@ class VulkanRenderer final : public BaseVulkanRenderer protected: void resize(int w, int h) override { + ERROR_LOG(RENDERER, "VulkanRenderer::resize %d %d", w, h); if ((u32)w == viewport.width && (u32)h == viewport.height) return; BaseVulkanRenderer::resize(w, h); diff --git a/core/sdl/sdl.cpp b/core/sdl/sdl.cpp index a43fa8b5a0..11df4d3ef4 100644 --- a/core/sdl/sdl.cpp +++ b/core/sdl/sdl.cpp @@ -399,6 +399,11 @@ void input_sdl_handle() if (windowFlags & SDL_WINDOW_OPENGL) SDL_GL_GetDrawableSize(window, &settings.display.width, &settings.display.height); else +#endif +#ifdef USE_METAL + if (windowFlags & SDL_WINDOW_METAL) + SDL_Metal_GetDrawableSize(window, &settings.display.width, &settings.display.height); + else #endif SDL_GetWindowSize(window, &settings.display.width, &settings.display.height); GraphicsContext::Instance()->resize(); @@ -700,7 +705,7 @@ bool sdl_recreate_window(u32 flags) } #if !defined(GLES) - flags |= SDL_WINDOW_RESIZABLE | SDL_WINDOW_ALLOW_HIGHDPI; + flags |= SDL_WINDOW_RESIZABLE; if (window_fullscreen) flags |= SDL_WINDOW_FULLSCREEN_DESKTOP; else if (window_maximized) diff --git a/core/types.h b/core/types.h index f33a8caa07..f434429de2 100644 --- a/core/types.h +++ b/core/types.h @@ -102,6 +102,7 @@ enum class RenderType { DirectX9 = 1, DirectX11 = 2, DirectX11_OIT = 6, + Metal = 7 }; static inline bool isOpenGL(RenderType renderType) { @@ -113,6 +114,9 @@ static inline bool isVulkan(RenderType renderType) { static inline bool isDirectX(RenderType renderType) { return renderType == RenderType::DirectX9 || renderType == RenderType::DirectX11 || renderType == RenderType::DirectX11_OIT; } +static inline bool isMetal(RenderType renderType) { + return renderType == RenderType::Metal; +} enum class KeyboardLayout { JP = 1, diff --git a/core/ui/gui.cpp b/core/ui/gui.cpp index 5540f123a3..0024a51e32 100644 --- a/core/ui/gui.cpp +++ b/core/ui/gui.cpp @@ -2198,6 +2198,9 @@ static void gui_settings_video() renderApi = 3; perPixel = true; break; + case RenderType::Metal: + renderApi = 4; + perPixel = false; } constexpr int apiCount = 0 @@ -2212,6 +2215,9 @@ static void gui_settings_video() #endif #ifdef USE_DX11 + 1 + #endif + #ifdef USE_METAL + + 1 #endif ; @@ -2227,7 +2233,7 @@ static void gui_settings_video() #endif #ifdef USE_VULKAN #ifdef __APPLE__ - ImGui::RadioButton("Vulkan (Metal)", &renderApi, 1); + ImGui::RadioButton("Vulkan (MoltenVK)", &renderApi, 1); ImGui::SameLine(0, innerSpacing); ShowHelpMarker("MoltenVK: An implementation of Vulkan that runs on Apple's Metal graphics framework"); #else @@ -2235,6 +2241,12 @@ static void gui_settings_video() #endif // __APPLE__ ImGui::NextColumn(); #endif +#ifdef USE_METAL +#ifdef __APPLE__ + ImGui::RadioButton("Metal", &renderApi, 4); + ImGui::NextColumn(); +#endif +#endif #ifdef USE_DX9 { DisabledScope _(settings.platform.isNaomi2()); @@ -2565,6 +2577,8 @@ static void gui_settings_video() case 3: config::RendererType = perPixel ? RenderType::DirectX11_OIT : RenderType::DirectX11; break; + case 4: + config::RendererType = RenderType::Metal; } } diff --git a/core/ui/mainui.cpp b/core/ui/mainui.cpp index 4b1ad55b5b..0cc7a22542 100644 --- a/core/ui/mainui.cpp +++ b/core/ui/mainui.cpp @@ -99,8 +99,8 @@ void mainui_loop(bool forceStart) if (config::RendererType != currentRenderer || forceReinit) { mainui_term(); - int prevApi = isOpenGL(currentRenderer) ? 0 : isVulkan(currentRenderer) ? 1 : currentRenderer == RenderType::DirectX9 ? 2 : 3; - int newApi = isOpenGL(config::RendererType) ? 0 : isVulkan(config::RendererType) ? 1 : config::RendererType == RenderType::DirectX9 ? 2 : 3; + int prevApi = isOpenGL(currentRenderer) ? 0 : isVulkan(currentRenderer) ? 1 : currentRenderer == RenderType::DirectX9 ? 2 : currentRenderer == RenderType::DirectX11 ? 3 : 4; + int newApi = isOpenGL(config::RendererType) ? 0 : isVulkan(config::RendererType) ? 1 : config::RendererType == RenderType::DirectX9 ? 2 : currentRenderer == RenderType::DirectX11 ? 3 : 4; if (newApi != prevApi || forceReinit) switchRenderApi(); mainui_init(); diff --git a/core/wsi/switcher.cpp b/core/wsi/switcher.cpp index 8d7e4dd23b..075a10a330 100644 --- a/core/wsi/switcher.cpp +++ b/core/wsi/switcher.cpp @@ -30,7 +30,11 @@ VulkanContext theVulkanContext; #endif +#ifdef USE_METAL +#include "rend/metal/metal_context.h" +MetalContext theMetalContext; +#endif GraphicsContext *GraphicsContext::instance; void initRenderApi(void *window, void *display) @@ -69,6 +73,17 @@ void initRenderApi(void *window, void *display) config::RendererType = RenderType::OpenGL; } #endif +#ifdef USE_METAL + if (isMetal(config::RendererType)) + { + theMetalContext.setWindow(window, display); + if (theMetalContext.init()) + return; + // Fall back to OpenGL + WARN_LOG(RENDERER, "Metal init failed. Falling back to OpenGL."); + config::RendererType = RenderType::OpenGL; + } +#endif #ifdef USE_OPENGL if (!isOpenGL(config::RendererType)) config::RendererType = RenderType::OpenGL; diff --git a/core/wsi/switcher.mm b/core/wsi/switcher.mm new file mode 100644 index 0000000000..fcce81f5ed --- /dev/null +++ b/core/wsi/switcher.mm @@ -0,0 +1,3 @@ +// When including Metal, switcher needs to be built +// with Obj-C++ to properly resolve types. +#include "switcher.cpp" \ No newline at end of file