diff --git a/Sources/kmsg/Accessibility/UIElement.swift b/Sources/kmsg/Accessibility/UIElement.swift index 5609ed0..c1c6264 100644 --- a/Sources/kmsg/Accessibility/UIElement.swift +++ b/Sources/kmsg/Accessibility/UIElement.swift @@ -150,6 +150,27 @@ public final class UIElement: @unchecked Sendable { return CGRect(origin: pos, size: size) } + public func setPosition(_ point: CGPoint) throws { + var point = point + guard let value = AXValueCreate(.cgPoint, &point) else { + throw AccessibilityError.typeMismatch + } + try setAttribute(kAXPositionAttribute, value: value) + } + + public func setSize(_ size: CGSize) throws { + var size = size + guard let value = AXValueCreate(.cgSize, &size) else { + throw AccessibilityError.typeMismatch + } + try setAttribute(kAXSizeAttribute, value: value) + } + + public func setFrame(_ frame: CGRect) throws { + try setPosition(frame.origin) + try setSize(frame.size) + } + // MARK: - Hierarchy public var parent: UIElement? { diff --git a/Sources/kmsg/Commands/MCPServerCommand.swift b/Sources/kmsg/Commands/MCPServerCommand.swift index a815182..94f79c2 100644 --- a/Sources/kmsg/Commands/MCPServerCommand.swift +++ b/Sources/kmsg/Commands/MCPServerCommand.swift @@ -136,6 +136,7 @@ private final class KmsgMCPServer { private let runner = KmsgSubprocessRunner() private let deepRecoveryDefault: Bool private let traceDefault: Bool + private let readLayoutDefault: String private let serverVersion: String private var initialized = false private var shutdown = false @@ -144,6 +145,7 @@ private final class KmsgMCPServer { let env = ProcessInfo.processInfo.environment deepRecoveryDefault = (env["KMSG_DEFAULT_DEEP_RECOVERY"] ?? "false").lowercased() == "true" traceDefault = (env["KMSG_TRACE_DEFAULT"] ?? "false").lowercased() == "true" + readLayoutDefault = Self.validReadLayout(env["KMSG_DEFAULT_READ_LAYOUT"]) ?? "preserve" serverVersion = env["KMSG_MCP_VERSION"]?.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty == false ? env["KMSG_MCP_VERSION"]!.trimmingCharacters(in: .whitespacesAndNewlines) : BuildVersion.current @@ -251,6 +253,7 @@ private final class KmsgMCPServer { "type": "object", "properties": [ "chat": ["type": "string", "description": "Chat room or user name"], + "chat_id": ["type": "string", "description": "Synthetic chat_id from kmsg chats"], "limit": ["type": "integer", "minimum": 1, "maximum": 100, "default": 20], "deep_recovery": [ "type": "boolean", @@ -267,8 +270,13 @@ private final class KmsgMCPServer { "default": traceDefault, "description": "Include AX tracing logs", ], + "layout": [ + "type": "string", + "enum": ["preserve", "left", "right"], + "default": readLayoutDefault, + "description": "Window layout before reading", + ], ], - "required": ["chat"], "additionalProperties": false, ], ], @@ -366,11 +374,22 @@ private final class KmsgMCPServer { private func callKmsgRead(_ arguments: JSONDict) -> JSONDict { let chat = String(describing: arguments["chat"] ?? "").trimmingCharacters(in: .whitespacesAndNewlines) - if chat.isEmpty { + let chatID = String(describing: arguments["chat_id"] ?? "").trimmingCharacters(in: .whitespacesAndNewlines) + if chat.isEmpty && chatID.isEmpty { return errorPayload( code: "INVALID_ARGUMENT", - message: "chat is required", - hint: "Provide a non-empty chat name.", + message: "chat or chat_id is required", + hint: "Provide either a non-empty chat name or a chat_id from kmsg chats.", + rawStdout: "", + rawStderr: "", + latencyMs: 0 + ) + } + if !chat.isEmpty && !chatID.isEmpty { + return errorPayload( + code: "INVALID_ARGUMENT", + message: "chat and chat_id cannot be used together", + hint: "Use chat_id for stable identity, or chat for name-based lookup.", rawStdout: "", rawStderr: "", latencyMs: 0 @@ -400,8 +419,24 @@ private final class KmsgMCPServer { let deepRecovery = boolValue(arguments["deep_recovery"], defaultValue: deepRecoveryDefault) let keepWindow = boolValue(arguments["keep_window"], defaultValue: false) let traceAX = boolValue(arguments["trace_ax"], defaultValue: traceDefault) + guard let layout = Self.validReadLayout(arguments["layout"] as? String ?? readLayoutDefault) else { + return errorPayload( + code: "INVALID_ARGUMENT", + message: "layout must be preserve, left, or right", + hint: "Use layout=preserve, layout=left, or layout=right.", + rawStdout: "", + rawStderr: "", + latencyMs: 0 + ) + } - var command = ["read", chat, "--json", "--limit", String(boundedLimit)] + var command = ["read"] + if !chatID.isEmpty { + command.append(contentsOf: ["--chat-id", chatID]) + } else { + command.append(chat) + } + command.append(contentsOf: ["--json", "--limit", String(boundedLimit), "--layout", layout]) if deepRecovery { command.append("--deep-recovery") } if keepWindow { command.append("--keep-window") } if traceAX { command.append("--trace-ax") } @@ -465,12 +500,13 @@ private final class KmsgMCPServer { var response: JSONDict = [ "ok": true, - "chat": payload["chat"] ?? chat, + "chat": payload["chat"] ?? (chat.isEmpty ? chatID : chat), "fetched_at": payload["fetched_at"] as Any, "count": payload["count"] ?? 0, "messages": payload["messages"] ?? [], "meta": [ "latency_ms": first.latencyMs, + "layout": layout, ], ] if traceAX, !first.stderr.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty { @@ -716,6 +752,17 @@ private final class KmsgMCPServer { return defaultValue } + private static func validReadLayout(_ raw: String?) -> String? { + guard let raw else { return nil } + let normalized = raw.trimmingCharacters(in: .whitespacesAndNewlines).lowercased() + switch normalized { + case "preserve", "left", "right": + return normalized + default: + return nil + } + } + private func jsonObject(from string: String) -> JSONDict? { guard let data = string.data(using: .utf8), let object = try? JSONSerialization.jsonObject(with: data), diff --git a/Sources/kmsg/Commands/ReadCommand.swift b/Sources/kmsg/Commands/ReadCommand.swift index 5ec610d..995a1d4 100644 --- a/Sources/kmsg/Commands/ReadCommand.swift +++ b/Sources/kmsg/Commands/ReadCommand.swift @@ -1,6 +1,8 @@ import ArgumentParser import Foundation +extension ChatWindowLayoutMode: ExpressibleByArgument {} + struct ReadCommand: ParsableCommand { private struct ReadJSONResponse: Encodable { let chat: String @@ -19,11 +21,20 @@ struct ReadCommand: ParsableCommand { static let configuration = CommandConfiguration( commandName: "read", abstract: "Read messages from a chat", - discussion: "When author is \"(me)\", the message was sent by you." + discussion: """ + Use either: + kmsg read + kmsg read --chat-id + + When author is "(me)", the message was sent by you. + """ ) + @Option(name: .long, help: "Read using a chat_id from 'kmsg chats'") + var chatID: String? + @Argument(help: "Name of the chat to read from (partial match supported)") - var chat: String + var chat: String? @Option(name: .shortAndLong, help: "Maximum number of messages to show") var limit: Int = 20 @@ -46,9 +57,25 @@ struct ReadCommand: ParsableCommand { ) var deepRecovery: Bool = false + @Option(name: .long, help: "Window layout before reading: preserve, left, or right") + var layout: ChatWindowLayoutMode = .preserve + @Flag(name: .long, help: "Output in JSON format") var json: Bool = false + func validate() throws { + if let chatID, !chatID.isEmpty { + guard chat == nil else { + throw ValidationError("Chat name cannot be provided together with --chat-id.") + } + return + } + + guard let chat, !chat.isEmpty else { + throw ValidationError("Chat name is required unless --chat-id is provided.") + } + } + func run() throws { guard AccessibilityPermission.ensureGranted() else { AccessibilityPermission.printInstructions() @@ -60,15 +87,24 @@ struct ReadCommand: ParsableCommand { let chatWindowResolver = ChatWindowResolver( kakao: kakao, runner: runner, - deepRecoveryEnabled: deepRecovery + deepRecoveryEnabled: deepRecovery, + layoutMode: layout ) let transcriptReader = KakaoTalkTranscriptReader(kakao: kakao, runner: runner) let resolution: ChatWindowResolution + let requestedChat: String do { - resolution = try chatWindowResolver.resolve(query: chat) + if let chatID { + requestedChat = chatID + resolution = try chatWindowResolver.resolve(chatID: chatID) + } else { + let chat = chat ?? "" + requestedChat = chat + resolution = try chatWindowResolver.resolve(query: chat) + } } catch { - print("No chat window found for '\(chat)'") + print("No chat window found for '\(requestedChat)'") print("Reason: \(error)") print("\nAvailable windows:") for (index, window) in kakao.windows.enumerated() { @@ -80,6 +116,11 @@ struct ReadCommand: ParsableCommand { let window = resolution.window if resolution.openedViaSearch { runner.log("read: opening chat via search") + } else if resolution.method == .openedViaChatList { + runner.log("read: opening chat via chat list") + } + + if resolution.openedTransiently { if keepWindow { runner.log("read: keep-window enabled; auto-opened window will be kept") } else { @@ -90,16 +131,16 @@ struct ReadCommand: ParsableCommand { } defer { - if resolution.openedViaSearch && !keepWindow { + if resolution.openedTransiently && !keepWindow { let resolvedTitle = window.title ?? "" - if !resolvedTitle.isEmpty && !resolvedTitle.localizedCaseInsensitiveContains(chat) { + if chatID == nil && !resolvedTitle.isEmpty && !resolvedTitle.localizedCaseInsensitiveContains(requestedChat) { runner.log("read: skipped auto-close because resolved title '\(resolvedTitle)' did not match query") } else if chatWindowResolver.closeWindow(window) { runner.log("read: auto-opened chat window closed") } else { runner.log("read: failed to close auto-opened chat window") } - } else if resolution.openedViaSearch && keepWindow { + } else if resolution.openedTransiently && keepWindow { runner.log("read: auto-opened chat window kept by --keep-window") } } @@ -108,7 +149,7 @@ struct ReadCommand: ParsableCommand { do { snapshot = try transcriptReader.readSnapshot( from: window, - fallbackChatTitle: chat, + fallbackChatTitle: window.title ?? requestedChat, limit: limit ) } catch TranscriptReadError.transcriptContextUnavailable { diff --git a/Sources/kmsg/Commands/SendCommand.swift b/Sources/kmsg/Commands/SendCommand.swift index c72c9e8..0b5102a 100644 --- a/Sources/kmsg/Commands/SendCommand.swift +++ b/Sources/kmsg/Commands/SendCommand.swift @@ -129,14 +129,10 @@ struct SendCommand: ParsableCommand { runner.log("window strategy: focusedWindow -> mainWindow -> windows.first") let resolution: ChatWindowResolution if let chatID { - guard let record = ChatIdentityRegistryStore.shared.record(for: chatID) else { - throw KakaoTalkError.elementNotFound("Unknown chat_id '\(chatID)'. Run 'kmsg chats' first to refresh the local registry.") - } print("Looking for chat with \(targetDescription)...") - print("Resolved \(targetDescription) to '\(record.displayName)'.") - resolution = try chatWindowResolver.resolve(query: record.displayName) - if resolution.openedViaSearch { - print("No existing chat window. Opening via search...") + resolution = try chatWindowResolver.resolve(chatID: chatID) + if resolution.openedTransiently { + print("No existing chat window. Opening via chat list or search...") } else { print("Found existing chat window.") } @@ -144,7 +140,7 @@ struct SendCommand: ParsableCommand { let recipient = recipient ?? "" print("Looking for chat with \(targetDescription)...") resolution = try chatWindowResolver.resolve(query: recipient) - if resolution.openedViaSearch { + if resolution.openedTransiently { print("No existing chat window. Opening via search...") } else { print("Found existing chat window.") diff --git a/Sources/kmsg/Commands/WatchCommand.swift b/Sources/kmsg/Commands/WatchCommand.swift index 8846ec7..3c3b416 100644 --- a/Sources/kmsg/Commands/WatchCommand.swift +++ b/Sources/kmsg/Commands/WatchCommand.swift @@ -100,7 +100,7 @@ struct WatchCommand: ParsableCommand { var currentWindow = resolution.window var currentChatTitle = currentWindow.title ?? chat - var autoOpenedWindow: UIElement? = resolution.openedViaSearch ? currentWindow : nil + var autoOpenedWindow: UIElement? = resolution.openedTransiently ? currentWindow : nil var cachedContext: MessageTranscriptContext? defer { @@ -224,7 +224,7 @@ struct WatchCommand: ParsableCommand { currentWindow = resolution.window currentChatTitle = currentWindow.title ?? chat cachedContext = nil - if resolution.openedViaSearch { + if resolution.openedTransiently { autoOpenedWindow = currentWindow } return try stabilizeBaseline( diff --git a/Sources/kmsg/KakaoTalk/ChatWindowResolver.swift b/Sources/kmsg/KakaoTalk/ChatWindowResolver.swift index 2d76e6c..98bdaf6 100644 --- a/Sources/kmsg/KakaoTalk/ChatWindowResolver.swift +++ b/Sources/kmsg/KakaoTalk/ChatWindowResolver.swift @@ -1,8 +1,15 @@ import ApplicationServices.HIServices import Foundation +enum ChatWindowLayoutMode: String { + case preserve + case left + case right +} + enum ChatWindowResolutionMethod { case existingWindow + case openedViaChatList case openedViaSearch } @@ -13,6 +20,10 @@ struct ChatWindowResolution { var openedViaSearch: Bool { method == .openedViaSearch } + + var openedTransiently: Bool { + method != .existingWindow + } } private enum ChatWindowFailureCode: String { @@ -43,32 +54,69 @@ private struct SearchCandidate { } struct ChatWindowResolver { + private static let minimumReadableWindowSize = CGSize(width: 760, height: 900) + private static let maximumAutomaticWindowSize = CGSize(width: 1200, height: 1000) + private let kakao: KakaoTalkApp private let runner: AXActionRunner private let useCache: Bool private let deepRecoveryEnabled: Bool + private let layoutMode: ChatWindowLayoutMode init( kakao: KakaoTalkApp, runner: AXActionRunner, useCache: Bool = true, - deepRecoveryEnabled: Bool = false + deepRecoveryEnabled: Bool = false, + layoutMode: ChatWindowLayoutMode = .preserve ) { self.kakao = kakao self.runner = runner self.useCache = useCache self.deepRecoveryEnabled = deepRecoveryEnabled + self.layoutMode = layoutMode } func resolve(query: String) throws -> ChatWindowResolution { let usableWindow = try requireUsableWindow() if let existingWindow = findMatchingChatWindow(in: kakao.windows, query: query) { + standardizeReadableWindow(existingWindow, label: "existing chat window") return ChatWindowResolution(window: existingWindow, method: .existingWindow) } let searchWindow = selectSearchWindow(fallback: usableWindow) + standardizeReadableWindow(searchWindow, label: "search root window") let chatWindow = try openChatViaSearch(query: query, in: searchWindow, fallbackWindow: usableWindow) + standardizeReadableWindow(chatWindow, label: "opened chat window") + return ChatWindowResolution(window: chatWindow, method: .openedViaSearch) + } + + func resolve(chatID: String) throws -> ChatWindowResolution { + guard let record = ChatIdentityRegistryStore.shared.record(for: chatID) else { + throw KakaoTalkError.elementNotFound("Unknown chat_id '\(chatID)'. Run 'kmsg chats' first to refresh the local registry.") + } + + let usableWindow = try requireUsableWindow() + let query = record.displayName + + if let existingWindow = findMatchingChatWindow(in: kakao.windows, query: query) { + standardizeReadableWindow(existingWindow, label: "existing chat window") + return ChatWindowResolution(window: existingWindow, method: .existingWindow) + } + + if let chatListWindow = kakao.chatListWindow, + let chatWindow = openChatListRow(chatID: chatID, query: query, in: chatListWindow, fallbackWindow: usableWindow) + { + standardizeReadableWindow(chatWindow, label: "opened chat window") + return ChatWindowResolution(window: chatWindow, method: .openedViaChatList) + } + + runner.log("chat_id: falling back to search for '\(query)'") + let searchWindow = selectSearchWindow(fallback: usableWindow) + standardizeReadableWindow(searchWindow, label: "search root window") + let chatWindow = try openChatViaSearch(query: query, in: searchWindow, fallbackWindow: usableWindow) + standardizeReadableWindow(chatWindow, label: "opened chat window") return ChatWindowResolution(window: chatWindow, method: .openedViaSearch) } @@ -229,6 +277,181 @@ struct ChatWindowResolver { throw KakaoTalkError.windowNotFound("[\(ChatWindowFailureCode.windowNotReady.rawValue)] Chat window for '\(query)' did not open") } + private func openChatListRow(chatID: String, query: String, in chatListWindow: UIElement, fallbackWindow: UIElement) -> UIElement? { + runner.log("chat_id: scanning chat list rows") + standardizeReadableWindow(chatListWindow, label: "chat list window") + let scanner = ChatListScanner() + let snapshots = scanner.scan(in: chatListWindow, limit: 200, trace: { message in + runner.log(message) + }) + guard !snapshots.isEmpty else { + runner.log("chat_id: chat list scan returned no rows") + return nil + } + + let registry = ChatIdentityRegistryStore.shared + let assignedIDs = registry.assignChatIDs(for: snapshots.map(\.discovery)) + guard let matchIndex = assignedIDs.firstIndex(of: chatID) else { + runner.log("chat_id: no visible chat row matched \(chatID)") + return nil + } + + let row = snapshots[matchIndex].element + runner.log("chat_id: matched row title='\(snapshots[matchIndex].discovery.title)'") + kakao.activate() + _ = tryRaiseWindow(chatListWindow) + + if triggerChatListRowOpen(row) { + if let window = waitForOpenedChatWindow(query: query, fallbackWindow: fallbackWindow) { + return window + } + } + + runner.log("chat_id: matched row did not open a chat window") + return nil + } + + private func triggerChatListRowOpen(_ row: UIElement) -> Bool { + if tryActivateSearchResult(row, label: "chat list row") { + return true + } + + let selected = trySelectSearchResult(row, label: "chat list row") + if !selected, let parent = row.parent, trySelectSearchResult(parent, label: "chat list row.parent") { + runner.pressEnterKey() + return true + } + + if selected { + runner.pressEnterKey() + return true + } + + return false + } + + private func standardizeReadableWindow(_ window: UIElement, label: String) { + kakao.activate() + _ = tryRaiseWindow(window) + + guard let currentSize = window.size else { + runner.log("\(label): size unavailable; skipping resize") + return + } + let currentFrame = window.frame + + let targetSize = readableTargetSize(for: currentSize) + guard targetSize != currentSize else { + runner.log("\(label): size already readable \(Int(currentSize.width))x\(Int(currentSize.height))") + if let layoutFrame = automaticLayoutFrame(for: window, preferredSize: targetSize, currentFrame: currentFrame) { + applyWindowFrame(layoutFrame, to: window, label: label) + } + return + } + + if let layoutFrame = automaticLayoutFrame(for: window, preferredSize: targetSize, currentFrame: currentFrame) { + applyWindowFrame(layoutFrame, to: window, label: label) + } else { + do { + try window.setSize(targetSize) + if let currentPosition = currentFrame?.origin { + try? window.setPosition(currentPosition) + } + runner.log("\(label): resized to \(Int(targetSize.width))x\(Int(targetSize.height))") + Thread.sleep(forTimeInterval: 0.08) + } catch { + runner.log("\(label): resize failed (\(error))") + } + } + } + + private func readableTargetSize(for currentSize: CGSize) -> CGSize { + CGSize( + width: readableTargetDimension( + current: currentSize.width, + minimum: Self.minimumReadableWindowSize.width, + automaticMaximum: Self.maximumAutomaticWindowSize.width + ), + height: readableTargetDimension( + current: currentSize.height, + minimum: Self.minimumReadableWindowSize.height, + automaticMaximum: Self.maximumAutomaticWindowSize.height + ) + ) + } + + private func readableTargetDimension(current: CGFloat, minimum: CGFloat, automaticMaximum: CGFloat) -> CGFloat { + if current >= automaticMaximum { + return current + } + + return max(current, minimum) + } + + private func automaticLayoutFrame( + for window: UIElement, + preferredSize: CGSize, + currentFrame: CGRect? + ) -> CGRect? { + guard layoutMode != .preserve else { + return nil + } + + let currentFrame = currentFrame ?? window.frame ?? CGRect(origin: .zero, size: preferredSize) + let screenFrame = screenFrame(containing: currentFrame) ?? CGDisplayBounds(CGMainDisplayID()) + let usableFrame = screenFrame.insetBy(dx: 24, dy: 24) + guard usableFrame.width > 0, usableFrame.height > 0 else { + return nil + } + + let layoutSize = CGSize( + width: min( + max(preferredSize.width, Self.minimumReadableWindowSize.width), + min(Self.maximumAutomaticWindowSize.width, usableFrame.width) + ), + height: min( + max(preferredSize.height, Self.minimumReadableWindowSize.height), + min(Self.maximumAutomaticWindowSize.height, usableFrame.height) + ) + ) + let x = layoutMode == .right ? usableFrame.maxX - layoutSize.width : usableFrame.minX + let y = min(max(currentFrame.minY, usableFrame.minY), usableFrame.maxY - layoutSize.height) + return CGRect(origin: CGPoint(x: x, y: y), size: layoutSize) + } + + private func screenFrame(containing frame: CGRect) -> CGRect? { + var displayCount: UInt32 = 0 + guard CGGetActiveDisplayList(0, nil, &displayCount) == .success, displayCount > 0 else { + return nil + } + + var displays = [CGDirectDisplayID](repeating: 0, count: Int(displayCount)) + guard CGGetActiveDisplayList(displayCount, &displays, &displayCount) == .success else { + return nil + } + + let referencePoint = CGPoint(x: frame.midX, y: frame.midY) + return displays + .map(CGDisplayBounds) + .first { $0.contains(referencePoint) } + } + + private func applyWindowFrame(_ frame: CGRect, to window: UIElement, label: String) { + do { + try window.setFrame(frame) + runner.log("\(label): laid out \(Int(frame.width))x\(Int(frame.height)) at \(Int(frame.minX)),\(Int(frame.minY))") + Thread.sleep(forTimeInterval: 0.08) + } catch { + runner.log("\(label): layout failed (\(error)); falling back to size-only resize") + do { + try window.setSize(frame.size) + Thread.sleep(forTimeInterval: 0.08) + } catch { + runner.log("\(label): resize fallback failed (\(error))") + } + } + } + private func resolveCachedElement( slot: AXPathSlot, root: UIElement,