瀏覽代碼

Merge pull request #104 from nanobrowser/minorfix

Minorfix
Ashu 3 月之前
父節點
當前提交
af5a4e4f8a

+ 1 - 0
README-es.md

@@ -147,6 +147,7 @@ Nanobrowser te permite configurar distintos modelos LLM para cada agente para eq
   - Sin costos de API y con privacidad total, sin datos que salgan de tu máquina
 
 - **Modelos Recomendados**:
+  - **Qwen3 14B**
   - **Falcon3 10B**
   - **Qwen 2.5 Coder 14B**
   - **Mistral Small 24B**

+ 1 - 0
README-zh-Hant.md

@@ -147,6 +147,7 @@ Nanobrowser 允許您為每個代理配置不同的 LLM 模型,以平衡性能
   - 零 API 成本且完全隱私,所有資料都在您的電腦上
 
 - **推薦模型**:
+  - **Qwen3 14B**
   - **Falcon3 10B**
   - **Qwen 2.5 Coder 14B**
   - **Mistral Small 24B**

+ 1 - 0
README.md

@@ -147,6 +147,7 @@ Nanobrowser allows you to configure different LLM models for each agent to balan
   - Zero API costs and complete privacy with no data leaving your machine
 
 - **Recommended Models**:
+  - **Qwen3 14B**
   - **Falcon3 10B**
   - **Qwen 2.5 Coder 14B**
   - **Mistral Small 24B**

+ 1 - 0
chrome-extension/src/background/agent/actions/builder.ts

@@ -22,6 +22,7 @@ import {
 import { z } from 'zod';
 import { createLogger } from '@src/background/log';
 import { ExecutionState, Actors } from '../event/types';
+import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
 
 const logger = createLogger('Action');
 

+ 37 - 18
chrome-extension/src/background/agent/actions/json_gemini.ts

@@ -25,6 +25,8 @@ export const geminiNavigatorOutputSchema = {
         properties: {
           done: {
             type: 'object',
+            description: 'Complete task',
+            nullable: true,
             properties: {
               text: {
                 type: 'string',
@@ -34,10 +36,12 @@ export const geminiNavigatorOutputSchema = {
               },
             },
             required: ['text', 'success'],
-            nullable: true,
           },
           search_google: {
             type: 'object',
+            description:
+              'Search the query in Google in the current tab, the query should be a search query like humans search in Google, concrete and not vague or super long. More the single most important items. ',
+            nullable: true,
             properties: {
               intent: {
                 type: 'string',
@@ -48,10 +52,11 @@ export const geminiNavigatorOutputSchema = {
               },
             },
             required: ['intent', 'query'],
-            nullable: true,
           },
           go_to_url: {
             type: 'object',
+            description: 'Navigate to URL in the current tab',
+            nullable: true,
             properties: {
               intent: {
                 type: 'string',
@@ -62,10 +67,11 @@ export const geminiNavigatorOutputSchema = {
               },
             },
             required: ['intent', 'url'],
-            nullable: true,
           },
           go_back: {
             type: 'object',
+            description: 'Go back to previous page',
+            nullable: true,
             properties: {
               intent: {
                 type: 'string',
@@ -73,10 +79,11 @@ export const geminiNavigatorOutputSchema = {
               },
             },
             required: ['intent'],
-            nullable: true,
           },
           wait: {
             type: 'object',
+            description: 'Wait for x seconds default 3',
+            nullable: true,
             properties: {
               intent: {
                 type: 'string',
@@ -87,10 +94,11 @@ export const geminiNavigatorOutputSchema = {
               },
             },
             required: ['intent', 'seconds'],
-            nullable: true,
           },
           click_element: {
             type: 'object',
+            description: 'Click element by index',
+            nullable: true,
             properties: {
               intent: {
                 type: 'string',
@@ -105,10 +113,11 @@ export const geminiNavigatorOutputSchema = {
               },
             },
             required: ['intent', 'index'],
-            nullable: true,
           },
           input_text: {
             type: 'object',
+            description: 'Input text into an interactive input element',
+            nullable: true,
             properties: {
               intent: {
                 type: 'string',
@@ -126,10 +135,11 @@ export const geminiNavigatorOutputSchema = {
               },
             },
             required: ['intent', 'index', 'text'],
-            nullable: true,
           },
           switch_tab: {
             type: 'object',
+            description: 'Switch tab',
+            nullable: true,
             properties: {
               intent: {
                 type: 'string',
@@ -140,10 +150,11 @@ export const geminiNavigatorOutputSchema = {
               },
             },
             required: ['intent', 'tab_id'],
-            nullable: true,
           },
           open_tab: {
             type: 'object',
+            description: 'Open url in new tab',
+            nullable: true,
             properties: {
               intent: {
                 type: 'string',
@@ -154,10 +165,11 @@ export const geminiNavigatorOutputSchema = {
               },
             },
             required: ['intent', 'url'],
-            nullable: true,
           },
           close_tab: {
             type: 'object',
+            description: 'Close tab by tab_id',
+            nullable: true,
             properties: {
               intent: {
                 type: 'string',
@@ -168,10 +180,11 @@ export const geminiNavigatorOutputSchema = {
               },
             },
             required: ['intent', 'tab_id'],
-            nullable: true,
           },
           cache_content: {
             type: 'object',
+            description: 'Cache what you have found so far from the current page for future use',
+            nullable: true,
             properties: {
               intent: {
                 type: 'string',
@@ -182,10 +195,11 @@ export const geminiNavigatorOutputSchema = {
               },
             },
             required: ['intent', 'content'],
-            nullable: true,
           },
           scroll_down: {
             type: 'object',
+            description: 'Scroll down the page by pixel amount - if no amount is specified, scroll down one page',
+            nullable: true,
             properties: {
               intent: {
                 type: 'string',
@@ -197,10 +211,11 @@ export const geminiNavigatorOutputSchema = {
               },
             },
             required: ['intent', 'amount'],
-            nullable: true,
           },
           scroll_up: {
             type: 'object',
+            description: 'Scroll up the page by pixel amount - if no amount is specified, scroll up one page',
+            nullable: true,
             properties: {
               intent: {
                 type: 'string',
@@ -212,10 +227,12 @@ export const geminiNavigatorOutputSchema = {
               },
             },
             required: ['intent', 'amount'],
-            nullable: true,
           },
           send_keys: {
             type: 'object',
+            description:
+              'Send strings of special keys like Escape, Backspace, Insert, PageDown, Delete, Enter, Shortcuts such as `Control+o`, `Control+Shift+T` are supported as well. This gets used in keyboard.press.',
+            nullable: true,
             properties: {
               intent: {
                 type: 'string',
@@ -226,10 +243,11 @@ export const geminiNavigatorOutputSchema = {
               },
             },
             required: ['intent', 'keys'],
-            nullable: true,
           },
           scroll_to_text: {
             type: 'object',
+            description: 'If you dont find something which you want to interact with, scroll to it',
+            nullable: true,
             properties: {
               intent: {
                 type: 'string',
@@ -240,10 +258,11 @@ export const geminiNavigatorOutputSchema = {
               },
             },
             required: ['intent', 'text'],
-            nullable: true,
           },
           get_dropdown_options: {
             type: 'object',
+            description: 'Get all options from a native dropdown',
+            nullable: true,
             properties: {
               intent: {
                 type: 'string',
@@ -254,10 +273,12 @@ export const geminiNavigatorOutputSchema = {
               },
             },
             required: ['intent', 'index'],
-            nullable: true,
           },
           select_dropdown_option: {
             type: 'object',
+            description:
+              'Select dropdown option for interactive element index by the text of the option you want to select',
+            nullable: true,
             properties: {
               intent: {
                 type: 'string',
@@ -271,10 +292,8 @@ export const geminiNavigatorOutputSchema = {
               },
             },
             required: ['intent', 'index', 'text'],
-            nullable: true,
           },
         },
-        required: [],
       },
     },
   },

+ 24 - 0
chrome-extension/src/background/agent/actions/json_schema.ts

@@ -26,6 +26,7 @@ export const jsonNavigatorOutputSchema = {
       items: {
         properties: {
           done: {
+            description: 'Complete task',
             properties: {
               text: {
                 title: 'Text',
@@ -42,6 +43,8 @@ export const jsonNavigatorOutputSchema = {
             nullable: true,
           },
           search_google: {
+            description:
+              'Search the query in Google in the current tab, the query should be a search query like humans search in Google, concrete and not vague or super long. More the single most important items. ',
             properties: {
               intent: {
                 title: 'Intent',
@@ -59,6 +62,7 @@ export const jsonNavigatorOutputSchema = {
             nullable: true,
           },
           go_to_url: {
+            description: 'Navigate to URL in the current tab',
             properties: {
               intent: {
                 title: 'Intent',
@@ -76,6 +80,7 @@ export const jsonNavigatorOutputSchema = {
             nullable: true,
           },
           go_back: {
+            description: 'Go back to previous page',
             properties: {
               intent: {
                 title: 'Intent',
@@ -89,6 +94,7 @@ export const jsonNavigatorOutputSchema = {
             nullable: true,
           },
           wait: {
+            description: 'Wait for x seconds default 3',
             properties: {
               intent: {
                 title: 'Intent',
@@ -107,6 +113,7 @@ export const jsonNavigatorOutputSchema = {
             nullable: true,
           },
           click_element: {
+            description: 'Click element by index',
             properties: {
               intent: {
                 title: 'Intent',
@@ -118,6 +125,7 @@ export const jsonNavigatorOutputSchema = {
                 type: 'integer',
               },
               xpath: {
+                title: 'Xpath',
                 type: 'string',
                 nullable: true,
               },
@@ -128,6 +136,7 @@ export const jsonNavigatorOutputSchema = {
             nullable: true,
           },
           input_text: {
+            description: 'Input text into an interactive input element',
             properties: {
               intent: {
                 title: 'Intent',
@@ -143,6 +152,7 @@ export const jsonNavigatorOutputSchema = {
                 type: 'string',
               },
               xpath: {
+                title: 'Xpath',
                 type: 'string',
                 nullable: true,
               },
@@ -153,6 +163,7 @@ export const jsonNavigatorOutputSchema = {
             nullable: true,
           },
           switch_tab: {
+            description: 'Switch tab',
             properties: {
               intent: {
                 title: 'Intent',
@@ -170,6 +181,7 @@ export const jsonNavigatorOutputSchema = {
             nullable: true,
           },
           open_tab: {
+            description: 'Open url in new tab',
             properties: {
               intent: {
                 title: 'Intent',
@@ -187,6 +199,7 @@ export const jsonNavigatorOutputSchema = {
             nullable: true,
           },
           close_tab: {
+            description: 'Close tab by tab_id',
             properties: {
               intent: {
                 title: 'Intent',
@@ -204,6 +217,7 @@ export const jsonNavigatorOutputSchema = {
             nullable: true,
           },
           cache_content: {
+            description: 'Cache what you have found so far from the current page for future use',
             properties: {
               intent: {
                 title: 'Intent',
@@ -221,6 +235,7 @@ export const jsonNavigatorOutputSchema = {
             nullable: true,
           },
           scroll_down: {
+            description: 'Scroll down the page by pixel amount - if no amount is specified, scroll down one page',
             properties: {
               intent: {
                 title: 'Intent',
@@ -228,6 +243,7 @@ export const jsonNavigatorOutputSchema = {
                 description: 'purpose of this action',
               },
               amount: {
+                title: 'Amount',
                 type: 'integer',
                 nullable: true,
               },
@@ -238,6 +254,7 @@ export const jsonNavigatorOutputSchema = {
             nullable: true,
           },
           scroll_up: {
+            description: 'Scroll up the page by pixel amount - if no amount is specified, scroll up one page',
             properties: {
               intent: {
                 title: 'Intent',
@@ -245,6 +262,7 @@ export const jsonNavigatorOutputSchema = {
                 description: 'purpose of this action',
               },
               amount: {
+                title: 'Amount',
                 type: 'integer',
                 nullable: true,
               },
@@ -255,6 +273,8 @@ export const jsonNavigatorOutputSchema = {
             nullable: true,
           },
           send_keys: {
+            description:
+              'Send strings of special keys like Escape, Backspace, Insert, PageDown, Delete, Enter, Shortcuts such as `Control+o`, `Control+Shift+T` are supported as well. This gets used in keyboard.press.',
             properties: {
               intent: {
                 title: 'Intent',
@@ -272,6 +292,7 @@ export const jsonNavigatorOutputSchema = {
             nullable: true,
           },
           scroll_to_text: {
+            description: 'If you dont find something which you want to interact with, scroll to it',
             properties: {
               intent: {
                 title: 'Intent',
@@ -289,6 +310,7 @@ export const jsonNavigatorOutputSchema = {
             nullable: true,
           },
           get_dropdown_options: {
+            description: 'Get all options from a native dropdown',
             properties: {
               intent: {
                 title: 'Intent',
@@ -306,6 +328,8 @@ export const jsonNavigatorOutputSchema = {
             nullable: true,
           },
           select_dropdown_option: {
+            description:
+              'Select dropdown option for interactive element index by the text of the option you want to select',
             properties: {
               intent: {
                 title: 'Intent',

+ 2 - 0
chrome-extension/src/background/agent/agents/validator.ts

@@ -58,6 +58,8 @@ export class ValidatorAgent extends BaseAgent<typeof validatorOutputSchema, Vali
         const mergedMessage = new HumanMessage(`${stateMessage.content}\n\nThe current plan is: \n${this.plan}`);
         stateMessage = mergedMessage;
       }
+      // logger.info('validator input', stateMessage);
+
       const systemMessage = this.prompt.getSystemMessage();
       const inputMessages = [systemMessage, stateMessage];
 

+ 6 - 4
chrome-extension/src/background/agent/executor.ts

@@ -137,17 +137,19 @@ export class Executor {
         if (this.planner && (context.nSteps % context.options.planningInterval === 0 || validatorFailed)) {
           validatorFailed = false;
           // The first planning step is special, we don't want to add the browser state message to memory
+          let positionForPlan = 0;
           if (this.tasks.length > 1 || step > 0) {
             await this.navigator.addStateMessageToMemory();
+            positionForPlan = this.context.messageManager.length() - 1;
+          } else {
+            positionForPlan = this.context.messageManager.length();
           }
 
           const planOutput = await this.planner.execute();
           if (planOutput.result) {
             logger.info(`🔄 Planner output: ${JSON.stringify(planOutput.result, null, 2)}`);
-            this.context.messageManager.addPlan(
-              JSON.stringify(planOutput.result),
-              this.context.messageManager.length() - 1,
-            );
+            this.context.messageManager.addPlan(JSON.stringify(planOutput.result), positionForPlan);
+
             if (planOutput.result.done) {
               // task is complete, skip navigation
               done = true;

+ 87 - 162
chrome-extension/src/background/agent/helper.ts

@@ -10,25 +10,23 @@ import { ChatDeepSeek } from '@langchain/deepseek';
 const maxTokens = 1024 * 4;
 
 function isOpenAIOModel(modelName: string): boolean {
-  return modelName.startsWith('openai/o') || modelName.startsWith('o');
+  if (modelName.startsWith('openai/')) {
+    return modelName.startsWith('openai/o');
+  }
+  return modelName.startsWith('o');
 }
 
 function createOpenAIChatModel(
   providerConfig: ProviderConfig,
   modelConfig: ModelConfig,
   // Add optional extra fetch options for headers etc.
-  extraFetchOptions?: { headers?: Record<string, string> },
+  extraFetchOptions: { headers?: Record<string, string> } | undefined,
 ): BaseChatModel {
   const args: {
     model: string;
     apiKey?: string;
     // Configuration should align with ClientOptions from @langchain/openai
-    configuration?: {
-      baseURL?: string;
-      defaultHeaders?: Record<string, string>;
-      // Add other ClientOptions if needed, e.g.?
-      // dangerouslyAllowBrowser?: boolean;
-    };
+    configuration?: Record<string, unknown>;
     modelKwargs?: {
       max_completion_tokens: number;
       reasoning_effort?: 'low' | 'medium' | 'high';
@@ -39,29 +37,16 @@ function createOpenAIChatModel(
   } = {
     model: modelConfig.modelName,
     apiKey: providerConfig.apiKey,
-    // Initialize configuration object
-    configuration: {},
   };
 
+  const configuration: Record<string, unknown> = {};
   if (providerConfig.baseUrl) {
-    // Set baseURL inside configuration
-    args.configuration!.baseURL = providerConfig.baseUrl;
+    configuration.baseURL = providerConfig.baseUrl;
   }
-
-  // Always add custom headers for OpenRouter to identify Nanobrowser
-  if (providerConfig.type === ProviderTypeEnum.OpenRouter) {
-    args.configuration!.defaultHeaders = {
-      ...(args.configuration!.defaultHeaders || {}),
-      'HTTP-Referer': 'https://nanobrowser.ai',
-      'X-Title': 'Nanobrowser',
-      ...(extraFetchOptions?.headers || {}),
-    };
-  } else if (extraFetchOptions?.headers) {
-    args.configuration!.defaultHeaders = {
-      ...(args.configuration!.defaultHeaders || {}),
-      ...extraFetchOptions.headers,
-    };
+  if (extraFetchOptions?.headers) {
+    configuration.defaultHeaders = extraFetchOptions.headers;
   }
+  args.configuration = configuration;
 
   // custom provider may have no api key
   if (providerConfig.apiKey) {
@@ -83,8 +68,6 @@ function createOpenAIChatModel(
     args.temperature = (modelConfig.parameters?.temperature ?? 0.1) as number;
     args.maxTokens = maxTokens;
   }
-  // Log args being passed to ChatOpenAI constructor inside the helper
-  console.log('[createOpenAIChatModel] Args passed to new ChatOpenAI:', args);
   return new ChatOpenAI(args);
 }
 
@@ -108,6 +91,75 @@ function isAzureProvider(providerId: string): boolean {
   return providerId === ProviderTypeEnum.AzureOpenAI || providerId.startsWith(`${ProviderTypeEnum.AzureOpenAI}_`);
 }
 
+// Function to create an Azure OpenAI chat model
+function createAzureChatModel(providerConfig: ProviderConfig, modelConfig: ModelConfig): BaseChatModel {
+  const temperature = (modelConfig.parameters?.temperature ?? 0.1) as number;
+  const topP = (modelConfig.parameters?.topP ?? 0.1) as number;
+
+  // Validate necessary fields first
+  if (
+    !providerConfig.baseUrl ||
+    !providerConfig.azureDeploymentNames ||
+    providerConfig.azureDeploymentNames.length === 0 ||
+    !providerConfig.azureApiVersion ||
+    !providerConfig.apiKey
+  ) {
+    throw new Error(
+      'Azure configuration is incomplete. Endpoint, Deployment Name, API Version, and API Key are required. Please check settings.',
+    );
+  }
+
+  // Instead of always using the first deployment name, use the model name from modelConfig
+  // which contains the actual model selected in the UI
+  const deploymentName = modelConfig.modelName;
+
+  // Validate that the selected model exists in the configured deployments
+  if (!providerConfig.azureDeploymentNames.includes(deploymentName)) {
+    console.warn(
+      `[createChatModel] Selected deployment "${deploymentName}" not found in available deployments. ` +
+        `Available: ${JSON.stringify(providerConfig.azureDeploymentNames)}. Using the model anyway.`,
+    );
+  }
+
+  // Extract instance name from the endpoint URL
+  const instanceName = extractInstanceNameFromUrl(providerConfig.baseUrl);
+  if (!instanceName) {
+    throw new Error(
+      `Could not extract Instance Name from Azure Endpoint URL: ${providerConfig.baseUrl}. Expected format like https://<your-instance-name>.openai.azure.com/`,
+    );
+  }
+
+  // Check if the Azure deployment is using an "o" series model (GPT-4o, etc.)
+  const isOSeriesModel = isOpenAIOModel(deploymentName);
+
+  // Use AzureChatOpenAI with specific parameters
+  const args = {
+    azureOpenAIApiInstanceName: instanceName, // Derived from endpoint
+    azureOpenAIApiDeploymentName: deploymentName,
+    azureOpenAIApiKey: providerConfig.apiKey,
+    azureOpenAIApiVersion: providerConfig.azureApiVersion,
+    // For Azure, the model name should be the deployment name itself
+    model: deploymentName, // Set model = deployment name to fix Azure requests
+    // For O series models, use modelKwargs instead of temperature/topP
+    ...(isOSeriesModel
+      ? {
+          modelKwargs: {
+            max_completion_tokens: maxTokens,
+            // Add reasoning_effort parameter for Azure o-series models if specified
+            ...(modelConfig.reasoningEffort ? { reasoning_effort: modelConfig.reasoningEffort } : {}),
+          },
+        }
+      : {
+          temperature,
+          topP,
+          maxTokens,
+        }),
+    // DO NOT pass baseUrl or configuration here
+  };
+  // console.log('[createChatModel] Azure args passed to AzureChatOpenAI:', args);
+  return new AzureChatOpenAI(args);
+}
+
 // create a chat model based on the agent name, the model name and provider
 export function createChatModel(providerConfig: ProviderConfig, modelConfig: ModelConfig): BaseChatModel {
   const temperature = (modelConfig.parameters?.temperature ?? 0.1) as number;
@@ -116,6 +168,11 @@ export function createChatModel(providerConfig: ProviderConfig, modelConfig: Mod
   // Check if the provider is an Azure provider with a custom ID (e.g. azure_openai_2)
   const isAzure = isAzureProvider(modelConfig.provider);
 
+  // If this is any type of Azure provider, handle it with the dedicated function
+  if (isAzure) {
+    return createAzureChatModel(providerConfig, modelConfig);
+  }
+
   switch (modelConfig.provider) {
     case ProviderTypeEnum.OpenAI: {
       // Call helper without extra options
@@ -187,151 +244,19 @@ export function createChatModel(providerConfig: ProviderConfig, modelConfig: Mod
       };
       return new ChatOllama(args);
     }
-    case ProviderTypeEnum.AzureOpenAI: {
-      // Validate necessary fields first
-      if (
-        !providerConfig.baseUrl ||
-        !providerConfig.azureDeploymentNames ||
-        providerConfig.azureDeploymentNames.length === 0 ||
-        !providerConfig.azureApiVersion ||
-        !providerConfig.apiKey
-      ) {
-        throw new Error(
-          'Azure configuration is incomplete. Endpoint, Deployment Name, API Version, and API Key are required. Please check settings.',
-        );
-      }
-
-      // Instead of always using the first deployment name, use the model name from modelConfig
-      // which contains the actual model selected in the UI
-      const deploymentName = modelConfig.modelName;
-
-      // Validate that the selected model exists in the configured deployments
-      if (!providerConfig.azureDeploymentNames.includes(deploymentName)) {
-        console.warn(
-          `[createChatModel] Selected deployment "${deploymentName}" not found in available deployments. ` +
-            `Available: ${JSON.stringify(providerConfig.azureDeploymentNames)}. Using the model anyway.`,
-        );
-      }
-
-      // Extract instance name from the endpoint URL
-      const instanceName = extractInstanceNameFromUrl(providerConfig.baseUrl);
-      if (!instanceName) {
-        throw new Error(
-          `Could not extract Instance Name from Azure Endpoint URL: ${providerConfig.baseUrl}. Expected format like https://<your-instance-name>.openai.azure.com/`,
-        );
-      }
-
-      // Check if the Azure deployment is using an "o" series model (GPT-4o, etc.)
-      const isOSeriesModel = isOpenAIOModel(deploymentName);
-
-      // Use AzureChatOpenAI with specific parameters
-      const args = {
-        azureOpenAIApiInstanceName: instanceName, // Derived from endpoint
-        azureOpenAIApiDeploymentName: deploymentName,
-        azureOpenAIApiKey: providerConfig.apiKey,
-        azureOpenAIApiVersion: providerConfig.azureApiVersion,
-        // For Azure, the model name should be the deployment name itself
-        model: deploymentName, // Set model = deployment name to fix Azure requests
-        // For O series models, use modelKwargs instead of temperature/topP
-        ...(isOSeriesModel
-          ? {
-              modelKwargs: {
-                max_completion_tokens: maxTokens,
-                // Add reasoning_effort parameter for Azure o-series models if specified
-                ...(modelConfig.reasoningEffort ? { reasoning_effort: modelConfig.reasoningEffort } : {}),
-              },
-            }
-          : {
-              temperature,
-              topP,
-              maxTokens,
-            }),
-        // DO NOT pass baseUrl or configuration here
-      };
-      console.log('[createChatModel] Azure args passed to AzureChatOpenAI:', args);
-      return new AzureChatOpenAI(args);
-    }
     case ProviderTypeEnum.OpenRouter: {
       // Call the helper function, passing OpenRouter headers via the third argument
       console.log('[createChatModel] Calling createOpenAIChatModel for OpenRouter');
       return createOpenAIChatModel(providerConfig, modelConfig, {
         headers: {
-          'HTTP-Referer': 'nanobrowser-extension',
-          'X-Title': 'NanoBrowser Extension',
+          'HTTP-Referer': 'https://nanobrowser.ai',
+          'X-Title': 'Nanobrowser',
         },
       });
     }
     default: {
-      // Check if this is a custom Azure provider (azure_openai_X)
-      if (isAzure) {
-        // Validate necessary fields first
-        if (
-          !providerConfig.baseUrl ||
-          !providerConfig.azureDeploymentNames ||
-          providerConfig.azureDeploymentNames.length === 0 ||
-          !providerConfig.azureApiVersion ||
-          !providerConfig.apiKey
-        ) {
-          throw new Error(
-            'Azure configuration is incomplete. Endpoint, Deployment Name, API Version, and API Key are required. Please check settings.',
-          );
-        }
-
-        // Instead of always using the first deployment name, use the model name from modelConfig
-        // which contains the actual model selected in the UI
-        const deploymentName = modelConfig.modelName;
-
-        // Validate that the selected model exists in the configured deployments
-        if (!providerConfig.azureDeploymentNames.includes(deploymentName)) {
-          console.warn(
-            `[createChatModel] Selected deployment "${deploymentName}" not found in available deployments. ` +
-              `Available: ${JSON.stringify(providerConfig.azureDeploymentNames)}. Using the model anyway.`,
-          );
-        }
-
-        // Extract instance name from the endpoint URL
-        const instanceName = extractInstanceNameFromUrl(providerConfig.baseUrl);
-        if (!instanceName) {
-          throw new Error(
-            `Could not extract Instance Name from Azure Endpoint URL: ${providerConfig.baseUrl}. Expected format like https://<your-instance-name>.openai.azure.com/`,
-          );
-        }
-
-        // Check if the Azure deployment is using an "o" series model (GPT-4o, etc.)
-        const isOSeriesModel = isOpenAIOModel(deploymentName);
-
-        // Use AzureChatOpenAI with specific parameters
-        const args = {
-          azureOpenAIApiInstanceName: instanceName, // Derived from endpoint
-          azureOpenAIApiDeploymentName: deploymentName,
-          azureOpenAIApiKey: providerConfig.apiKey,
-          azureOpenAIApiVersion: providerConfig.azureApiVersion,
-          // For Azure, the model name should be the deployment name itself
-          model: deploymentName, // Set model = deployment name to fix Azure requests
-          // For O series models, use modelKwargs instead of temperature/topP
-          ...(isOSeriesModel
-            ? {
-                modelKwargs: {
-                  max_completion_tokens: maxTokens,
-                  // Add reasoning_effort parameter for Azure o-series models if specified
-                  ...(modelConfig.reasoningEffort ? { reasoning_effort: modelConfig.reasoningEffort } : {}),
-                },
-              }
-            : {
-                temperature,
-                topP,
-                maxTokens,
-              }),
-          // DO NOT pass baseUrl or configuration here
-        };
-        console.log('[createChatModel] Azure args (custom ID) passed to AzureChatOpenAI:', args);
-        return new AzureChatOpenAI(args);
-      }
-
-      // If not Azure, handles CustomOpenAI
       // by default, we think it's a openai-compatible provider
       // Pass undefined for extraFetchOptions for default/custom cases
-      console.log('[createChatModel] Calling createOpenAIChatModel for default/custom provider');
       return createOpenAIChatModel(providerConfig, modelConfig, undefined);
     }
   }

+ 1 - 1
chrome-extension/src/background/agent/messages/service.ts

@@ -167,7 +167,7 @@ export default class MessageManager {
    */
   public addPlan(plan?: string, position?: number): void {
     if (plan) {
-      const msg = new AIMessage({ content: plan });
+      const msg = new AIMessage({ content: `<plan>${plan}</plan>` });
       this.addMessageWithTokens(msg, null, position);
     }
   }

+ 28 - 18
chrome-extension/src/background/agent/prompts/templates/navigator.ts

@@ -80,28 +80,38 @@ Common action sequences:
 
 9. Extraction:
 
-- When searching for information or conducting research:
-  1. First analyze and extract relevant content from the current visible state
-  2. If the needed information is incomplete:
-     - Use cache_content action to cache the current findings
-     - Scroll down EXACTLY ONE PAGE at a time using scroll_page action
-     - NEVER scroll more than one page at once as this will cause loss of information
-     - Repeat the analyze-cache-scroll cycle until either:
-       * All required information is found, or
-       * Maximum 5 page scrolls have been performed
-  3. Before completing the task:
-     - Combine all cached content with the current state
+- Extraction process for research tasks or searching for information:
+  1. ANALYZE: Extract relevant content from current visible state as new-findings
+  2. EVALUATE: Check if information is sufficient taking into account the new-findings and the cached-findings in memory
+     - If SUFFICIENT → Complete task using all findings
+     - If INSUFFICIENT → Follow these steps in order:
+       a) CACHE: First of all, use cache_content action to store new-findings
+       b) SCROLL: Move EXACTLY ONE PAGE using scroll_down/scroll_up
+       c) REPEAT: Continue analyze-evaluate loop until either:
+          • Information becomes sufficient
+          • Maximum 8 page scrolls completed
+  3. FINALIZE:
+     - Combine all cached-findings with new-findings from current visible state
      - Verify all required information is collected
-     - Present the complete findings in the done action
-- Important extraction guidelines:
-  - Be thorough and specific when extracting information
-  - Always cache findings before scrolling to avoid losing information
-  - Always verify source information before caching
-  - Scroll down EXACTLY ONE PAGE at a time
-  - Stop after maximum 5 page scrolls
+     - Present complete findings in done action
+
+- Critical guidelines:d
+  • Be thorough and specific in extraction
+  • Always cache findings BEFORE scrolling
+  • Verify source information before caching
+  • Scroll EXACTLY ONE PAGE each time
+  • NEVER scroll more than one page at once, as this will cause loss of information
+  • NEVER scroll less than half a page, as this is inefficient and you will get stuck in a loop
+  • Stop after maximum 8 page scrolls
 
 10. Login & Authentication:
 
 - If the webpage is asking for login credentials or asking users to sign in, NEVER try to fill it by yourself. Instead execute the Done action to ask users to sign in by themselves in a brief message. 
 - Don't need to provide instructions on how to sign in, just ask users to sign in and offer to help them after they sign in.
+
+11. Plan:
+
+- Plan is a json string wrapped by the <plan> tag
+- If a plan is provided, follow the instructions in the next_steps exactly first
+- If no plan is provided, just continue with the task
 `;

+ 4 - 3
chrome-extension/src/background/agent/prompts/validator.ts

@@ -48,12 +48,13 @@ RULES of ANSWERING THE TASK:
 
 SPECIAL CASES:
 1. If the task is unclear defined, you can let it pass. But if something is missing or the image does not show what was requested, do NOT let it pass
-2. Try to understand the page and help the model with suggestions like scroll, do x, ... to get the solution right
-3. If the webpage is asking for username or password, you should respond with:
+2. If the task is required to consolidate information from multiple pages, focus on the last Action Result. The current page is not important for validation but the last Action Result is.
+3. Try to understand the page and help the model with suggestions like scroll, do x, ... to get the solution right
+4. If the webpage is asking for username or password, you should respond with:
   - is_valid: true
   - reason: describe the reason why it is valid although the task is not completed yet
   - answer: ask the user to sign in by themselves
-4. If the output is correct and the task is completed, you should respond with 
+5. If the output is correct and the task is completed, you should respond with 
   - is_valid: true
   - reason: "Task completed"
   - answer: The final answer to the task

+ 133 - 73
packages/schema-utils/lib/helper.ts

@@ -60,7 +60,27 @@ function processSchemaNode(node: JsonSchemaObject, definitions: Record<string, J
     const definition = definitions[refPath];
     if (definition) {
       // Process the definition to resolve any nested references
-      return processSchemaNode(definition, definitions);
+      const processedDefinition = processSchemaNode(definition, definitions);
+
+      // Create a new object that preserves properties from the original node (except $ref)
+      const result: JsonSchemaObject = {};
+
+      // First copy properties from the original node except $ref
+      for (const [key, value] of Object.entries(node)) {
+        if (key !== '$ref') {
+          result[key] = value;
+        }
+      }
+
+      // Then copy properties from the processed definition
+      // Don't override any existing properties in the original node
+      for (const [key, value] of Object.entries(processedDefinition)) {
+        if (result[key] === undefined) {
+          result[key] = value;
+        }
+      }
+
+      return result;
     }
   }
 
@@ -74,7 +94,24 @@ function processSchemaNode(node: JsonSchemaObject, definitions: Record<string, J
     const hasNullType = processedAnyOf.some(item => item.type === 'null');
 
     if (nonNullTypes.length === 1 && hasNullType) {
-      const result = { ...nonNullTypes[0] };
+      // Create a result that preserves all properties from the original node
+      const result: JsonSchemaObject = {};
+
+      // Copy all properties from original node except anyOf
+      for (const [key, value] of Object.entries(node)) {
+        if (key !== 'anyOf') {
+          result[key] = value;
+        }
+      }
+
+      // Merge in properties from the non-null type
+      for (const [key, value] of Object.entries(nonNullTypes[0])) {
+        // Don't override properties that were in the original node
+        if (result[key] === undefined) {
+          result[key] = value;
+        }
+      }
+
       result.nullable = true;
       return result;
     }
@@ -115,93 +152,95 @@ function processSchemaNode(node: JsonSchemaObject, definitions: Record<string, J
  * Converts an OpenAI format JSON schema to a Google Gemini compatible schema
  *
  * Key differences handled:
- * 1. OpenAI uses $defs and $ref for references, Gemini uses inline definitions
+ * 1. OpenAI accepts $defs and $ref for references, Gemini only accepts inline definitions
  * 2. Different structure for nullable properties
  * 3. Gemini has a flatter structure for defining properties
+ * 4. https://ai.google.dev/api/caching#Schema
+ * 5. https://ai.google.dev/gemini-api/docs/structured-output?lang=node#json-schemas
  *
  * @param openaiSchema The OpenAI format JSON schema to convert
+ * @param ensureOrder If true, adds the propertyOrdering field for consistent ordering
  * @returns A Google Gemini compatible JSON schema
  */
-export function convertOpenAISchemaToGemini(openaiSchema: JsonSchemaObject): JsonSchemaObject {
+export function convertOpenAISchemaToGemini(openaiSchema: JsonSchemaObject, ensureOrder = false): JsonSchemaObject {
+  // First flatten the schema with dereferenceJsonSchema
+  const flattenedSchema = dereferenceJsonSchema(openaiSchema);
+
   // Create a new schema object
   const geminiSchema: JsonSchemaObject = {
-    type: openaiSchema.type,
+    type: flattenedSchema.type,
     properties: {},
-    required: openaiSchema.required || [],
+    required: flattenedSchema.required || [],
   };
 
-  // Process definitions to use for resolving references
-  const definitions = openaiSchema.$defs || {};
-
   // Process properties
-  if (openaiSchema.properties) {
-    geminiSchema.properties = processProperties(openaiSchema.properties, definitions);
+  if (flattenedSchema.properties) {
+    geminiSchema.properties = processPropertiesForGemini(flattenedSchema.properties, ensureOrder);
+
+    // Add propertyOrdering for top-level properties if ensureOrder is true
+    if (ensureOrder && geminiSchema.properties) {
+      geminiSchema.propertyOrdering = Object.keys(flattenedSchema.properties);
+    }
+  }
+
+  // Copy other Gemini-compatible fields
+  if (flattenedSchema.description) {
+    geminiSchema.description = flattenedSchema.description;
+  }
+
+  if (flattenedSchema.format) {
+    geminiSchema.format = flattenedSchema.format;
+  }
+
+  if (flattenedSchema.enum) {
+    geminiSchema.enum = flattenedSchema.enum;
+  }
+
+  if (flattenedSchema.nullable) {
+    geminiSchema.nullable = flattenedSchema.nullable;
+  }
+
+  // Handle array items
+  if (flattenedSchema.type === 'array' && flattenedSchema.items) {
+    geminiSchema.items = processPropertyForGemini(flattenedSchema.items);
+
+    if (flattenedSchema.minItems !== undefined) {
+      geminiSchema.minItems = flattenedSchema.minItems;
+    }
+
+    if (flattenedSchema.maxItems !== undefined) {
+      geminiSchema.maxItems = flattenedSchema.maxItems;
+    }
   }
 
   return geminiSchema;
 }
 
 /**
- * Process properties recursively, resolving references and converting to Gemini format
+ * Process properties recursively, converting to Gemini format
  */
-function processProperties(
+function processPropertiesForGemini(
   properties: Record<string, JsonSchemaObject>,
-  definitions: Record<string, JsonSchemaObject>,
+  addPropertyOrdering: boolean = false,
 ): Record<string, JsonSchemaObject> {
   const result: Record<string, JsonSchemaObject> = {};
 
   for (const [key, value] of Object.entries(properties)) {
     if (typeof value !== 'object' || value === null) continue;
 
-    result[key] = processProperty(value, definitions);
+    result[key] = processPropertyForGemini(value, addPropertyOrdering);
   }
 
   return result;
 }
 
 /**
- * Process a single property, resolving references and converting to Gemini format
+ * Process a single property, converting to Gemini format
+ *
+ * @param property The property to process
+ * @param addPropertyOrdering Whether to add property ordering for object properties
  */
-function processProperty(property: JsonSchemaObject, definitions: Record<string, JsonSchemaObject>): JsonSchemaObject {
-  // If it's a reference, resolve it
-  if (property.$ref) {
-    const refPath = property.$ref.replace('#/$defs/', '');
-    const definition = definitions[refPath];
-    if (definition) {
-      return processProperty(definition, definitions);
-    }
-  }
-
-  // Handle anyOf for nullable properties
-  if (property.anyOf) {
-    const nonNullType = property.anyOf.find(item => item.type !== 'null' && !item.$ref);
-
-    const refType = property.anyOf.find(item => item.$ref);
-
-    const isNullable = property.anyOf.some(item => item.type === 'null');
-
-    if (refType?.$ref) {
-      const refPath = refType.$ref.replace('#/$defs/', '');
-      const definition = definitions[refPath];
-
-      if (definition) {
-        const processed = processProperty(definition, definitions);
-        if (isNullable) {
-          processed.nullable = true;
-        }
-        return processed;
-      }
-    }
-
-    if (nonNullType) {
-      const processed = processProperty(nonNullType, definitions);
-      if (isNullable) {
-        processed.nullable = true;
-      }
-      return processed;
-    }
-  }
-
+function processPropertyForGemini(property: JsonSchemaObject, addPropertyOrdering = false): JsonSchemaObject {
   // Create a new property object
   const result: JsonSchemaObject = {
     type: property.type,
@@ -212,30 +251,51 @@ function processProperty(property: JsonSchemaObject, definitions: Record<string,
     result.description = property.description;
   }
 
-  // Process nested properties
-  if (property.properties) {
-    result.properties = processProperties(property.properties, definitions);
+  // Copy format if it exists
+  if (property.format) {
+    result.format = property.format;
+  }
+
+  // Copy enum if it exists
+  if (property.enum) {
+    result.enum = property.enum;
+  }
+
+  // Copy nullable if it exists
+  if (property.nullable) {
+    result.nullable = property.nullable;
+  }
+
+  // Process nested properties for objects
+  if (property.type === 'object' && property.properties) {
+    result.properties = processPropertiesForGemini(property.properties, addPropertyOrdering);
 
     // Copy required fields
     if (property.required) {
       result.required = property.required;
-    } else {
-      result.required = [];
+    }
+
+    // Add propertyOrdering for nested object if needed
+    if (addPropertyOrdering && property.properties) {
+      result.propertyOrdering = Object.keys(property.properties);
+    }
+    // Copy propertyOrdering if it already exists
+    else if (property.propertyOrdering) {
+      result.propertyOrdering = property.propertyOrdering;
     }
   }
 
   // Handle arrays
-  if (property.items) {
-    result.items = processProperty(property.items, definitions);
-  }
+  if (property.type === 'array' && property.items) {
+    result.items = processPropertyForGemini(property.items, addPropertyOrdering);
 
-  // Handle special case for NoParamsAction which is an object in OpenAI but a string in Gemini
-  if (property.additionalProperties === true && property.title === 'NoParamsAction' && property.description) {
-    return {
-      type: 'string',
-      nullable: true,
-      description: property.description,
-    };
+    if (property.minItems !== undefined) {
+      result.minItems = property.minItems;
+    }
+
+    if (property.maxItems !== undefined) {
+      result.maxItems = property.maxItems;
+    }
   }
 
   return result;
@@ -251,7 +311,7 @@ export function stringifyCustom(value: JSONSchemaType, indent = '', baseIndent =
   switch (typeof value) {
     case 'string':
       // Escape single quotes within the string if necessary
-      return `'${value.replace(/'/g, "\\\\'")}'`;
+      return `'${(value as string).replace(/'/g, "\\\\'")}'`;
     case 'number':
     case 'boolean':
       return String(value);
@@ -270,7 +330,7 @@ export function stringifyCustom(value: JSONSchemaType, indent = '', baseIndent =
       const properties = keys.map(key => {
         // Assume keys are valid JS identifiers and don't need quotes
         const formattedKey = key;
-        const formattedValue = stringifyCustom(value[key], currentIndent, baseIndent);
+        const formattedValue = stringifyCustom(value[key] as JSONSchemaType, currentIndent, baseIndent);
         return `${currentIndent}${formattedKey}: ${formattedValue}`;
       });
       return `{\n${properties.join(',\n')}\n${indent}}`;

+ 1 - 1
packages/storage/lib/settings/llmProviders.ts

@@ -117,7 +117,7 @@ export function getDefaultProviderConfig(providerId: string): ProviderConfig {
         apiKey: 'ollama', // Set default API key for Ollama
         name: getDefaultDisplayNameFromProviderId(ProviderTypeEnum.Ollama),
         type: ProviderTypeEnum.Ollama,
-        modelNames: [], // Ollama uses modelNames (user adds them)
+        modelNames: llmProviderModelNames[providerId],
         baseUrl: 'http://localhost:11434',
         createdAt: Date.now(),
       };

+ 48 - 51
packages/storage/lib/settings/types.ts

@@ -22,28 +22,25 @@ export enum ProviderTypeEnum {
 
 // Default supported models for each built-in provider
 export const llmProviderModelNames = {
-  [ProviderTypeEnum.OpenAI]: ['gpt-4o', 'gpt-4o-mini', 'o1', 'o3-mini'],
+  [ProviderTypeEnum.OpenAI]: ['gpt-4.1', 'gpt-4.1-mini', 'gpt-4.1-nano', 'gpt-4o', 'gpt-4o-mini', 'o4-mini', 'o3'],
   [ProviderTypeEnum.Anthropic]: ['claude-3-7-sonnet-latest', 'claude-3-5-sonnet-latest', 'claude-3-5-haiku-latest'],
   [ProviderTypeEnum.DeepSeek]: ['deepseek-chat', 'deepseek-reasoner'],
   [ProviderTypeEnum.Gemini]: [
-    'gemini-2.0-flash',
-    'gemini-2.0-flash-lite',
+    'gemini-2.5-flash-preview-04-17',
     'gemini-2.5-pro-exp-03-25',
+    'gemini-2.5-pro-preview-03-25',
+    'gemini-2.0-flash',
     // 'gemini-2.0-flash-thinking-exp-01-21', // TODO: not support function calling for now
   ],
-  [ProviderTypeEnum.Grok]: ['grok-2', 'grok-2-vision'],
-  [ProviderTypeEnum.Ollama]: [],
-  [ProviderTypeEnum.AzureOpenAI]: ['gpt-4o', 'gpt-4o-mini', 'o3-mini', 'gpt-4.1', 'gpt-4.1-mini', 'gpt-4.1-nano'],
+  [ProviderTypeEnum.Grok]: ['grok-3', 'grok-3-fast', 'grok-3-mini', 'grok-3-mini-fast'],
+  [ProviderTypeEnum.Ollama]: ['qwen3:14b', 'falcon3:10b', 'qwen2.5-coder:14b', 'mistral-small:24b'],
+  [ProviderTypeEnum.AzureOpenAI]: ['gpt-4.1', 'gpt-4.1-mini', 'gpt-4.1-nano', 'gpt-4o', 'gpt-4o-mini', 'o4-mini', 'o3'],
   [ProviderTypeEnum.OpenRouter]: [
-    'openai/gpt-4o-2024-11-20',
     'openai/gpt-4.1',
     'openai/gpt-4.1-mini',
-    'openai/gpt-4.1-nano',
     'openai/o4-mini',
-    'anthropic/claude-3.5-sonnet',
-    'anthropic/claude-3.7-sonnet',
-    'google/gemini-2.0-flash-001',
-    'deepseek/deepseek-chat-v3-0324:free',
+    'openai/gpt-4o-2024-11-20',
+    'google/gemini-2.5-flash-preview',
   ],
   // Custom OpenAI providers don't have predefined models as they are user-defined
 };
@@ -52,44 +49,44 @@ export const llmProviderModelNames = {
 export const llmProviderParameters = {
   [ProviderTypeEnum.OpenAI]: {
     [AgentNameEnum.Planner]: {
-      temperature: 0.01,
-      topP: 0.001,
+      temperature: 0.7,
+      topP: 0.9,
     },
     [AgentNameEnum.Navigator]: {
-      temperature: 0,
-      topP: 0.001,
+      temperature: 0.3,
+      topP: 0.85,
     },
     [AgentNameEnum.Validator]: {
-      temperature: 0,
-      topP: 0.001,
+      temperature: 0.1,
+      topP: 0.8,
     },
   },
   [ProviderTypeEnum.Anthropic]: {
     [AgentNameEnum.Planner]: {
-      temperature: 0.1,
-      topP: 0.1,
+      temperature: 0.3,
+      topP: 0.6,
     },
     [AgentNameEnum.Navigator]: {
-      temperature: 0.1,
-      topP: 0.1,
+      temperature: 0.2,
+      topP: 0.5,
     },
     [AgentNameEnum.Validator]: {
-      temperature: 0.05,
-      topP: 0.1,
+      temperature: 0.1,
+      topP: 0.5,
     },
   },
   [ProviderTypeEnum.Gemini]: {
     [AgentNameEnum.Planner]: {
-      temperature: 0.01,
-      topP: 0.1,
+      temperature: 0.7,
+      topP: 0.9,
     },
     [AgentNameEnum.Navigator]: {
-      temperature: 0.01,
-      topP: 0.1,
+      temperature: 0.3,
+      topP: 0.85,
     },
     [AgentNameEnum.Validator]: {
       temperature: 0.1,
-      topP: 0.1,
+      topP: 0.8,
     },
   },
   [ProviderTypeEnum.Grok]: {
@@ -98,54 +95,54 @@ export const llmProviderParameters = {
       topP: 0.9,
     },
     [AgentNameEnum.Navigator]: {
-      temperature: 0.7,
-      topP: 0.9,
+      temperature: 0.3,
+      topP: 0.85,
     },
     [AgentNameEnum.Validator]: {
-      temperature: 0.7,
-      topP: 0.9,
+      temperature: 0.1,
+      topP: 0.8,
     },
   },
   [ProviderTypeEnum.Ollama]: {
     [AgentNameEnum.Planner]: {
-      temperature: 0,
-      topP: 0.001,
+      temperature: 0.3,
+      topP: 0.9,
     },
     [AgentNameEnum.Navigator]: {
-      temperature: 0.01,
-      topP: 0.001,
+      temperature: 0.1,
+      topP: 0.85,
     },
     [AgentNameEnum.Validator]: {
-      temperature: 0,
-      topP: 0.001,
+      temperature: 0.1,
+      topP: 0.8,
     },
   },
   [ProviderTypeEnum.AzureOpenAI]: {
     [AgentNameEnum.Planner]: {
-      temperature: 0.01,
-      topP: 0.001,
+      temperature: 0.7,
+      topP: 0.9,
     },
     [AgentNameEnum.Navigator]: {
-      temperature: 0,
-      topP: 0.001,
+      temperature: 0.3,
+      topP: 0.85,
     },
     [AgentNameEnum.Validator]: {
-      temperature: 0,
-      topP: 0.001,
+      temperature: 0.1,
+      topP: 0.8,
     },
   },
   [ProviderTypeEnum.OpenRouter]: {
     [AgentNameEnum.Planner]: {
-      temperature: 0.01,
-      topP: 0.001,
+      temperature: 0.7,
+      topP: 0.9,
     },
     [AgentNameEnum.Navigator]: {
-      temperature: 0,
-      topP: 0.001,
+      temperature: 0.3,
+      topP: 0.85,
     },
     [AgentNameEnum.Validator]: {
-      temperature: 0,
-      topP: 0.001,
+      temperature: 0.1,
+      topP: 0.8,
     },
   },
 };

+ 6 - 6
pages/options/src/components/ModelSettings.tsx

@@ -20,12 +20,13 @@ import {
   getDefaultAgentModelParams,
   type ProviderConfig,
 } from '@extension/storage';
-// Import chrome for messaging
-const IS_CHROME = typeof chrome !== 'undefined' && typeof chrome.runtime !== 'undefined';
 
 // Helper function to check if a model is an O-series model
 function isOpenAIOModel(modelName: string): boolean {
-  return modelName.startsWith('openai/o') || modelName.startsWith('o');
+  if (modelName.startsWith('openai/')) {
+    return modelName.startsWith('openai/o');
+  }
+  return modelName.startsWith('o');
 }
 
 interface ModelSettingsProps {
@@ -425,7 +426,7 @@ export const ModelSettings = ({ isDarkMode = false }: ModelSettingsProps) => {
 
       if (providers[provider].type === ProviderTypeEnum.AzureOpenAI) {
         // Ensure modelNames is NOT included for Azure
-        delete configToSave.modelNames;
+        configToSave.modelNames = undefined;
       } else {
         // Ensure modelNames IS included for non-Azure
         // Use existing modelNames from state, or default if somehow missing
@@ -537,7 +538,6 @@ export const ModelSettings = ({ isDarkMode = false }: ModelSettingsProps) => {
         // For Azure, verify the model is in the deployment names list
         if (providerConfig && providerConfig.type === ProviderTypeEnum.AzureOpenAI) {
           console.log(`[handleModelChange] Azure model selected: ${model}`);
-          console.log(`[handleModelChange] Available deployments:`, providerConfig.azureDeploymentNames || []);
         }
 
         // Reset reasoning effort if switching models
@@ -964,7 +964,7 @@ export const ModelSettings = ({ isDarkMode = false }: ModelSettingsProps) => {
   const addAzureProvider = () => {
     // Count existing Azure providers
     const azureProviders = Object.keys(providers).filter(
-      key => key === ProviderTypeEnum.AzureOpenAI || key.startsWith(ProviderTypeEnum.AzureOpenAI + '_'),
+      key => key === ProviderTypeEnum.AzureOpenAI || key.startsWith(`${ProviderTypeEnum.AzureOpenAI}_`),
     );
     const nextNumber = azureProviders.length + 1;