From 3b6588b63281b907b513dc2ccc7c51b75bfe19a4 Mon Sep 17 00:00:00 2001 From: Paul Klein Date: Sat, 27 Dec 2025 19:31:51 -0500 Subject: [PATCH] feat(google-cua): add screenshot pruning to prevent memory growth MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GoogleCUAClient now prunes old screenshots from conversation history, keeping only the most recent maxImages (default: 3) screenshots. This matches the behavior of MicrosoftCUAClient and prevents unbounded memory growth during long agent sessions, especially on image-heavy websites. The maxImages option can be configured via clientOptions.maxImages. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- .../fix-google-cua-screenshot-pruning.md | 9 +++ packages/core/lib/v3/agent/GoogleCUAClient.ts | 75 +++++++++++++++++++ 2 files changed, 84 insertions(+) create mode 100644 .changeset/fix-google-cua-screenshot-pruning.md diff --git a/.changeset/fix-google-cua-screenshot-pruning.md b/.changeset/fix-google-cua-screenshot-pruning.md new file mode 100644 index 000000000..cdf157dfc --- /dev/null +++ b/.changeset/fix-google-cua-screenshot-pruning.md @@ -0,0 +1,9 @@ +--- +"@browserbasehq/stagehand": patch +--- + +Add screenshot pruning to GoogleCUAClient to prevent memory growth + +The GoogleCUAClient now prunes old screenshots from conversation history, keeping only the most recent `maxImages` (default: 3) screenshots. This matches the behavior of MicrosoftCUAClient and prevents unbounded memory growth during long agent sessions, especially on image-heavy websites. + +The `maxImages` option can be configured via `clientOptions.maxImages` when initializing the agent. diff --git a/packages/core/lib/v3/agent/GoogleCUAClient.ts b/packages/core/lib/v3/agent/GoogleCUAClient.ts index 33fb84dfc..6afddb290 100644 --- a/packages/core/lib/v3/agent/GoogleCUAClient.ts +++ b/packages/core/lib/v3/agent/GoogleCUAClient.ts @@ -59,6 +59,8 @@ export class GoogleCUAClient extends AgentClient { private tools?: ToolSet; private baseURL?: string; private safetyConfirmationHandler?: SafetyConfirmationHandler; + private maxImages: number = 3; + constructor( type: AgentType, modelName: string, @@ -93,6 +95,11 @@ export class GoogleCUAClient extends AgentClient { this.environment = clientOptions.environment as typeof this.environment; } + // Max images to keep in history (to prevent memory growth) + if (clientOptions?.maxImages !== undefined) { + this.maxImages = clientOptions.maxImages as number; + } + this.generateContentConfig = { temperature: 1, topP: 0.95, @@ -641,6 +648,10 @@ export class GoogleCUAClient extends AgentClient { role: "user", parts: functionResponses, }); + + // Prune old screenshots to prevent memory growth + // Keep only the most recent maxImages screenshots in history + this.maybeRemoveOldScreenshots(logger); } } @@ -667,6 +678,70 @@ export class GoogleCUAClient extends AgentClient { } } + /** + * Remove old screenshots from history to prevent memory growth. + * Keeps only the most recent maxImages screenshots, removing inlineData + * from older entries while preserving the rest of the history structure. + */ + private maybeRemoveOldScreenshots( + logger: (message: LogLine) => void, + ): void { + if (this.maxImages <= 0) { + return; + } + + let screenshotCount = 0; + let prunedCount = 0; + + // Traverse history from newest to oldest + for (let i = this.history.length - 1; i >= 0; i--) { + const entry = this.history[i]; + if (!entry?.parts) continue; + + // Check if this entry contains screenshots (inlineData) + // Using same pattern as imageCompression.ts for type handling + const hasScreenshot = entry.parts.some((p: Part) => { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const funcResp = p as any; + if (funcResp.functionResponse?.parts) { + return funcResp.functionResponse.parts.some( + (pp: { inlineData?: unknown }) => pp.inlineData, + ); + } + return false; + }); + + if (hasScreenshot) { + screenshotCount++; + if (screenshotCount > this.maxImages) { + // Remove inlineData from old screenshots + for (const p of entry.parts) { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const funcResp = p as any; + if (funcResp.functionResponse?.parts) { + const originalLength = funcResp.functionResponse.parts.length; + funcResp.functionResponse.parts = + funcResp.functionResponse.parts.filter( + (pp: { inlineData?: unknown }) => !pp.inlineData, + ); + if (funcResp.functionResponse.parts.length < originalLength) { + prunedCount++; + } + } + } + } + } + } + + if (prunedCount > 0) { + logger({ + category: "agent", + message: `Pruned ${prunedCount} old screenshots from history (keeping ${this.maxImages} most recent)`, + level: 2, + }); + } + } + /** * Process the response from Google's API */