3 months ago · a16f482d52
--- a/chrome-extension/src/background/agent/agents/base.ts
+++ b/chrome-extension/src/background/agent/agents/base.ts
@@ -117,10 +117,10 @@ export abstract class BaseAgent<T extends z.ZodType, M = unknown> {
 
																         if (response.parsed) {
															
 
																           return response.parsed;
															
 
																         }
															
 
																-        logger.error('Failed to parse response', response);
															
 
																-        throw new Error('Could not parse response with structured output');
															
 
																+        logger.error('无法解析响应', response);
															
 
																+        throw new Error('无法用结构化输出解析响应');
															
 
																       } catch (error) {
															
 
																-        const errorMessage = `Failed to invoke ${this.modelName} with structured output: ${error}`;
															
 
																+        const errorMessage = `未能使用结构化输出调用 ${this.modelName}: ${error}`;
															
 
																         throw new Error(errorMessage);
															
 
																       }
															
 
																     }
															
@@ -139,13 +139,13 @@ export abstract class BaseAgent<T extends z.ZodType, M = unknown> {
 
																           return parsed;
															
 
																         }
															
 
																       } catch (error) {
															
 
																-        const errorMessage = `Failed to extract JSON from response: ${error}`;
															
 
																+        const errorMessage = `无法从响应中提取 JSON 数据: ${error}`;
															
 
																         throw new Error(errorMessage);
															
 
																       }
															
 
																     }
															
 
																-    const errorMessage = `Failed to parse response: ${response}`;
															
 
																+    const errorMessage = `无法解析响应： ${response}`;
															
 
																     logger.error(errorMessage);
															
 
																-    throw new Error('Could not parse response');
															
 
																+    throw new Error('无法解析响应');
															
 
																   }
															
 
																   // Execute the agent and return the result
															
@@ -158,7 +158,7 @@ export abstract class BaseAgent<T extends z.ZodType, M = unknown> {
 
																       return this.modelOutputSchema.parse(data);
															
 
																     } catch (error) {
															
 
																       logger.error('validateModelOutput', error);
															
 
																-      throw new Error('Could not validate model output');
															
 
																+      throw new Error('无法验证模型输出');
															
 
																     }
															
 
																   }
															
 
																 }
															
--- a/chrome-extension/src/background/agent/agents/errors.ts
+++ b/chrome-extension/src/background/agent/agents/errors.ts
@@ -51,7 +51,7 @@ export class ChatModelForbiddenError extends Error {
 
																 }
															
 
																 export const LLM_FORBIDDEN_ERROR_MESSAGE =
															
 
																-  'Access denied (403 Forbidden). Please check:\n\n1. Your API key has the required permissions\n\n2. For Ollama: Set OLLAMA_ORIGINS=chrome-extension://* \nsee https://github.com/ollama/ollama/blob/main/docs/faq.md';
															
 
																+  '访问被拒绝（403禁止）。请检查: n  n1。您的API密钥具有所需权限\n\n2。设置OLLAMA_ORIGINS=chrome-extension://* \nsee https://github.com/ollama/ollama/blob/main/docs/faq.md';
															
 
																 /**
															
 
																  * Checks if an error is related to API authentication
															
--- a/chrome-extension/src/background/agent/agents/navigator.ts
+++ b/chrome-extension/src/background/agent/agents/navigator.ts
@@ -89,7 +89,7 @@ export class NavigatorAgent extends BaseAgent<z.ZodType, NavigatorResult> {
 
																           return response.parsed;
															
 
																         }
															
 
																       } catch (error) {
															
 
																-        const errorMessage = `Failed to invoke ${this.modelName} with structured output: ${error}`;
															
 
																+        const errorMessage = `未能调用具有结构化输出的${this.modelName}: ${error}`;
															
 
																         throw new Error(errorMessage);
															
 
																       }
															
@@ -105,7 +105,7 @@ export class NavigatorAgent extends BaseAgent<z.ZodType, NavigatorResult> {
 
																       // sometimes LLM returns an empty content, but with one or more tool calls, so we need to check the tool calls
															
 
																       if (rawResponse.tool_calls && rawResponse.tool_calls.length > 0) {
															
 
																-        logger.info('Navigator structuredLlm tool call with empty content', rawResponse.tool_calls);
															
 
																+        logger.info('导航器结构化llm工具调用，内容为空', rawResponse.tool_calls);
															
 
																         // only use the first tool call
															
 
																         const toolCall = rawResponse.tool_calls[0];
															
 
																         return {
															
@@ -113,9 +113,9 @@ export class NavigatorAgent extends BaseAgent<z.ZodType, NavigatorResult> {
 
																           action: [...toolCall.args.action],
															
 
																         };
															
 
																       }
															
 
																-      throw new Error('Could not parse response');
															
 
																+      throw new Error('无法解析响应');
															
 
																     }
															
 
																-    throw new Error('Navigator needs to work with LLM that supports tool calling');
															
 
																+    throw new Error('Navigator需要与支持工具调用的LLM一起工作');
															
 
																   }
															
 
																   async execute(): Promise<AgentOutput<NavigatorResult>> {
															
@@ -173,14 +173,14 @@ export class NavigatorAgent extends BaseAgent<z.ZodType, NavigatorResult> {
 
																       this.removeLastStateMessageFromMemory();
															
 
																       // Check if this is an authentication error
															
 
																       if (isAuthenticationError(error)) {
															
 
																-        throw new ChatModelAuthError('Navigator API Authentication failed. Please verify your API key', error);
															
 
																+        throw new ChatModelAuthError('Navigator API身份验证失败。请验证您的API密钥', error);
															
 
																       }
															
 
																       if (isForbiddenError(error)) {
															
 
																         throw new ChatModelForbiddenError(LLM_FORBIDDEN_ERROR_MESSAGE, error);
															
 
																       }
															
 
																       const errorMessage = error instanceof Error ? error.message : String(error);
															
 
																-      const errorString = `Navigation failed: ${errorMessage}`;
															
 
																+      const errorString = `导航: ${errorMessage}`;
															
 
																       logger.error(errorString);
															
 
																       this.context.emitEvent(Actors.NAVIGATOR, ExecutionState.STEP_FAIL, errorString);
															
 
																       agentOutput.error = errorMessage;
															
@@ -220,8 +220,8 @@ export class NavigatorAgent extends BaseAgent<z.ZodType, NavigatorResult> {
 
																             // Get only the last line of the error
															
 
																             const lastLine = errorText.split('\n').pop() || '';
															
 
																-            const msg = new HumanMessage(`Action error: ${lastLine}`);
															
 
																-            logger.info('Adding action error to memory', msg.content);
															
 
																+            const msg = new HumanMessage(`操作错误: ${lastLine}`);
															
 
																+            logger.info('将操作错误添加到内存中', msg.content);
															
 
																             messageManager.addMessageWithTokens(msg);
															
 
																           }
															
 
																           // reset this action result to empty, we dont want to add it again in the state message
															
@@ -272,7 +272,7 @@ export class NavigatorAgent extends BaseAgent<z.ZodType, NavigatorResult> {
 
																         actions = JSON.parse(response.action);
															
 
																       } catch (error) {
															
 
																         logger.error('Invalid action format', response.action);
															
 
																-        throw new Error('Invalid action output format');
															
 
																+        throw new Error('无效的操作输出格式');
															
 
																       }
															
 
																     } else {
															
 
																       // if the action is neither an array nor a string, it should be an object
															
@@ -305,7 +305,7 @@ export class NavigatorAgent extends BaseAgent<z.ZodType, NavigatorResult> {
 
																           const newPathHashes = await calcBranchPathHashSet(newState);
															
 
																           // next action requires index but there are new elements on the page
															
 
																           if (!newPathHashes.isSubsetOf(cachedPathHashes)) {
															
 
																-            const msg = `Something new appeared after action ${i} / ${actions.length}`;
															
 
																+            const msg = `行动之后出现了新的东西 ${i} / ${actions.length}`;
															
 
																             logger.info(msg);
															
 
																             results.push(
															
 
																               new ActionResult({
															
@@ -319,7 +319,7 @@ export class NavigatorAgent extends BaseAgent<z.ZodType, NavigatorResult> {
 
																         const result = await actionInstance.call(actionArgs);
															
 
																         if (result === undefined) {
															
 
																-          throw new Error(`Action ${actionName} returned undefined`);
															
 
																+          throw new Error(`操作${actionName}返回未定义d`);
															
 
																         }
															
 
																         results.push(result);
															
 
																         // check if the task is paused or stopped
															
@@ -335,7 +335,7 @@ export class NavigatorAgent extends BaseAgent<z.ZodType, NavigatorResult> {
 
																         this.context.emitEvent(Actors.NAVIGATOR, ExecutionState.ACT_FAIL, errorMessage);
															
 
																         errCount++;
															
 
																         if (errCount > 3) {
															
 
																-          throw new Error('Too many errors in actions');
															
 
																+          throw new Error('动作错误太多');
															
 
																         }
															
 
																         results.push(
															
 
																           new ActionResult({
															
--- a/chrome-extension/src/background/agent/agents/planner.ts
+++ b/chrome-extension/src/background/agent/agents/planner.ts
@@ -73,7 +73,7 @@ export class PlannerAgent extends BaseAgent<typeof plannerOutputSchema, PlannerO
 
																       const modelOutput = await this.invoke(plannerMessages);
															
 
																       if (!modelOutput) {
															
 
																-        throw new Error('Failed to validate planner output');
															
 
																+        throw new Error('未能验证规划器输出');
															
 
																       }
															
 
																       this.context.emitEvent(Actors.PLANNER, ExecutionState.STEP_OK, modelOutput.next_steps);
															
@@ -84,13 +84,13 @@ export class PlannerAgent extends BaseAgent<typeof plannerOutputSchema, PlannerO
 
																     } catch (error) {
															
 
																       // Check if this is an authentication error
															
 
																       if (isAuthenticationError(error)) {
															
 
																-        throw new ChatModelAuthError('Planner API Authentication failed. Please verify your API key', error);
															
 
																+        throw new ChatModelAuthError('Planner API鉴权失败。请验证您的API密钥', error);
															
 
																       }
															
 
																       if (isForbiddenError(error)) {
															
 
																         throw new ChatModelForbiddenError(LLM_FORBIDDEN_ERROR_MESSAGE, error);
															
 
																       }
															
 
																       const errorMessage = error instanceof Error ? error.message : String(error);
															
 
																-      this.context.emitEvent(Actors.PLANNER, ExecutionState.STEP_FAIL, `Planning failed: ${errorMessage}`);
															
 
																+      this.context.emitEvent(Actors.PLANNER, ExecutionState.STEP_FAIL, `策划失败: ${errorMessage}`);
															
 
																       return {
															
 
																         id: this.id,
															
 
																         error: errorMessage,
															
--- a/chrome-extension/src/background/agent/agents/validator.ts
+++ b/chrome-extension/src/background/agent/agents/validator.ts
@@ -20,7 +20,7 @@ export const validatorOutputSchema = z.object({
 
																     z.string().transform(val => {
															
 
																       if (val.toLowerCase() === 'true') return true;
															
 
																       if (val.toLowerCase() === 'false') return false;
															
 
																-      throw new Error('Invalid boolean string');
															
 
																+      throw new Error('无效的布尔字符串');
															
 
																     }),
															
 
																   ]), // indicates if the output is correct
															
 
																   reason: z.string(), // explains why it is valid or not
															
@@ -65,14 +65,14 @@ export class ValidatorAgent extends BaseAgent<typeof validatorOutputSchema, Vali
 
																       const modelOutput = await this.invoke(inputMessages);
															
 
																       if (!modelOutput) {
															
 
																-        throw new Error('Failed to validate task result');
															
 
																+        throw new Error('验证任务结果失败');
															
 
																       }
															
 
																       logger.info('validator output', JSON.stringify(modelOutput, null, 2));
															
 
																       if (!modelOutput.is_valid) {
															
 
																         // need to update the action results so that other agents can see the error
															
 
																-        const msg = `The answer is not yet correct. ${modelOutput.reason}.`;
															
 
																+        const msg = `答案还不正确。 ${modelOutput.reason}`;
															
 
																         this.context.emitEvent(Actors.VALIDATOR, ExecutionState.STEP_FAIL, msg);
															
 
																         this.context.actionResults = [new ActionResult({ extractedContent: msg, includeInMemory: true })];
															
 
																       } else {
															
@@ -86,18 +86,18 @@ export class ValidatorAgent extends BaseAgent<typeof validatorOutputSchema, Vali
 
																     } catch (error) {
															
 
																       // Check if this is an authentication error
															
 
																       if (isAuthenticationError(error)) {
															
 
																-        throw new ChatModelAuthError('Validator API Authentication failed. Please verify your API key', error);
															
 
																+        throw new ChatModelAuthError('验证器API身份验证失败。请验证您的API密钥', error);
															
 
																       }
															
 
																       if (isForbiddenError(error)) {
															
 
																         throw new ChatModelForbiddenError(LLM_FORBIDDEN_ERROR_MESSAGE, error);
															
 
																       }
															
 
																       const errorMessage = error instanceof Error ? error.message : String(error);
															
 
																-      logger.error(`Validation failed: ${errorMessage}`);
															
 
																-      this.context.emitEvent(Actors.VALIDATOR, ExecutionState.STEP_FAIL, `Validation failed: ${errorMessage}`);
															
 
																+      logger.error(`验证失败: ${errorMessage}`);
															
 
																+      this.context.emitEvent(Actors.VALIDATOR, ExecutionState.STEP_FAIL, `验证失败: ${errorMessage}`);
															
 
																       return {
															
 
																         id: this.id,
															
 
																-        error: `Validation failed: ${errorMessage}`,
															
 
																+        error: `验证失败: ${errorMessage}`,
															
 
																       };
															
 
																     }
															
 
																   }
															
--- a/chrome-extension/src/background/agent/executor.ts
+++ b/chrome-extension/src/background/agent/executor.ts
@@ -109,7 +109,7 @@ export class Executor {
 
																    * @returns {Promise<void>}
															
 
																    */
															
 
																   async execute(): Promise<void> {
															
 
																-    logger.info(`🚀 Executing task: ${this.tasks[this.tasks.length - 1]}`);
															
 
																+    logger.info(`🚀 执行任务: ${this.tasks[this.tasks.length - 1]}`);
															
 
																     // reset the step counter
															
 
																     const context = this.context;
															
 
																     context.nSteps = 0;
															
@@ -190,16 +190,16 @@ export class Executor {
 
																       if (done) {
															
 
																         this.context.emitEvent(Actors.SYSTEM, ExecutionState.TASK_OK, this.context.taskId);
															
 
																       } else if (step >= allowedMaxSteps) {
															
 
																-        logger.info('❌ Task failed: Max steps reached');
															
 
																-        this.context.emitEvent(Actors.SYSTEM, ExecutionState.TASK_FAIL, 'Task failed: Max steps reached');
															
 
																+        logger.info('❌ 任务失败：已达到最大步骤');
															
 
																+        this.context.emitEvent(Actors.SYSTEM, ExecutionState.TASK_FAIL, '任务失败：已达到最大步骤');
															
 
																       } else if (this.context.stopped) {
															
 
																-        this.context.emitEvent(Actors.SYSTEM, ExecutionState.TASK_CANCEL, 'Task cancelled');
															
 
																+        this.context.emitEvent(Actors.SYSTEM, ExecutionState.TASK_CANCEL, '任务取消');
															
 
																       } else {
															
 
																-        this.context.emitEvent(Actors.SYSTEM, ExecutionState.TASK_PAUSE, 'Task paused');
															
 
																+        this.context.emitEvent(Actors.SYSTEM, ExecutionState.TASK_PAUSE, '任务暂停');
															
 
																       }
															
 
																     } catch (error) {
															
 
																       const errorMessage = error instanceof Error ? error.message : String(error);
															
 
																-      this.context.emitEvent(Actors.SYSTEM, ExecutionState.TASK_FAIL, `Task failed: ${errorMessage}`);
															
 
																+      this.context.emitEvent(Actors.SYSTEM, ExecutionState.TASK_FAIL, `任务失败: ${errorMessage}`);
															
 
																     }
															
 
																   }
															
@@ -229,9 +229,9 @@ export class Executor {
 
																         throw error;
															
 
																       }
															
 
																       context.consecutiveFailures++;
															
 
																-      logger.error(`Failed to execute step: ${error}`);
															
 
																+      logger.error(`执行步骤失败：${error}`);
															
 
																       if (context.consecutiveFailures >= context.options.maxFailures) {
															
 
																-        throw new Error('Max failures reached');
															
 
																+        throw new Error('达到最大错误步数');
															
 
																       }
															
 
																     }
															
 
																     return false;
															
--- a/chrome-extension/src/background/agent/helper.ts
+++ b/chrome-extension/src/background/agent/helper.ts
@@ -104,9 +104,7 @@ function createAzureChatModel(providerConfig: ProviderConfig, modelConfig: Model
 
																     !providerConfig.azureApiVersion ||
															
 
																     !providerConfig.apiKey
															
 
																   ) {
															
 
																-    throw new Error(
															
 
																-      'Azure configuration is incomplete. Endpoint, Deployment Name, API Version, and API Key are required. Please check settings.',
															
 
																-    );
															
 
																+    throw new Error('Azure配置不完整。待填写的信息包括：端点、部署名称、API版本和API密钥。请检查设置。');
															
 
																   }
															
 
																   // Instead of always using the first deployment name, use the model name from modelConfig
															
--- a/chrome-extension/src/background/agent/prompts/base.ts
+++ b/chrome-extension/src/background/agent/prompts/base.ts
@@ -80,13 +80,13 @@ abstract class BasePrompt {
 
																       .filter(tab => tab.id !== browserState.tabId)
															
 
																       .map(tab => `- {id: ${tab.id}, url: ${tab.url}, title: ${tab.title}}`);
															
 
																     const stateDescription = `
															
 
																-[Task history memory ends]
															
 
																-[Current state starts here]
															
 
																-The following is one-time information - if you need to remember it write it to memory:
															
 
																-Current tab: ${currentTab}
															
 
																-Other available tabs:
															
 
																+[任务历史记忆结束]
															
 
																+[当前状态开始]
															
 
																+以下是一次性信息 - 如果您需要记住它，请将其写入内存：
															
 
																+当前标签页: ${currentTab}
															
 
																+其他可用标签页:
															
 
																   ${otherTabs.join('\n')}
															
 
																-Interactive elements from top layer of the current page inside the viewport:
															
 
																+当前页面视口内顶层的交互元素:
															
 
																 ${formattedElementsText}
															
 
																 ${stepInfoDescription}
															
 
																 ${actionResultsDescription}
															
--- a/chrome-extension/src/background/agent/prompts/planner.ts
+++ b/chrome-extension/src/background/agent/prompts/planner.ts
@@ -5,52 +5,51 @@ import type { AgentContext } from '@src/background/agent/types';
 
																 export class PlannerPrompt extends BasePrompt {
															
 
																   getSystemMessage(): SystemMessage {
															
 
																-    return new SystemMessage(`You are a helpful assistant.
															
 
																+    return new SystemMessage(`您是一个有帮助的助手。
															
 
																-RESPONSIBILITIES:
															
 
																-1. Judge whether the ultimate task is related to web browsing or not and set the "web_task" field.
															
 
																-2. If web_task is false, then just answer the task directly as a helpful assistant
															
 
																-  - Output the answer into "next_steps" field in the JSON object. 
															
 
																-  - Set "done" field to true
															
 
																-  - Set these fields in the JSON object to empty string: "observation", "challenges", "reasoning"
															
 
																-  - Be kind and helpful when answering the task
															
 
																-  - Do NOT offer anything that users don't explicitly ask for.
															
 
																-  - Do NOT make up anything, if you don't know the answer, just say "I don't know"
															
 
																+职责：
															
 
																+1. 判断最终任务是否与网页浏览相关，并设置"web_task"字段。
															
 
																+2. 如果web_task为false，则作为有帮助的助手直接回答任务
															
 
																+  - 将答案输出到JSON对象的"next_steps"字段中。
															
 
																+  - 将"done"字段设置为true
															
 
																+  - 将JSON对象中的这些字段设置为空字符串："observation"、"challenges"、"reasoning"
															
 
																+  - 在回答任务时要友善和乐于助人
															
 
																+  - 不要提供用户没有明确要求的任何内容。
															
 
																+  - 不要编造任何内容，如果您不知道答案，只需说"我不知道"
															
 
																-3. If web_task is true, then helps break down tasks into smaller steps and reason about the current state
															
 
																-  - Analyze the current state and history
															
 
																-  - Evaluate progress towards the ultimate goal
															
 
																-  - Identify potential challenges or roadblocks
															
 
																-  - Suggest the next high-level steps to take
															
 
																-  - If you know the direct URL, use it directly instead of searching for it (e.g. github.com, www.espn.com). Search it if you don't know the direct URL.
															
 
																-  - Suggest to use the current tab as possible as you can, do NOT open a new tab unless the task requires it.
															
 
																-  - IMPORTANT: 
															
 
																-    - Always prioritize working with content visible in the current viewport first:
															
 
																-    - Focus on elements that are immediately visible without scrolling
															
 
																-    - Only suggest scrolling if the required content is confirmed to not be in the current view
															
 
																-    - Scrolling is your LAST resort unless you are explicitly required to do so by the task
															
 
																-    - NEVER suggest scrolling through the entire page, only scroll ONE PAGE at a time.
															
 
																-4. Once web_task is set to either true or false, its value The value must never change from its first set state in the conversation.
															
 
																+3. 如果web_task为true，则帮助将任务分解为更小的步骤并推理当前状态
															
 
																+  - 分析当前状态和历史
															
 
																+  - 评估朝着最终目标的进展
															
 
																+  - 识别潜在的挑战或障碍
															
 
																+  - 建议下一步要采取的高级步骤
															
 
																+  - 如果您知道直接URL，请直接使用它而不是搜索它（例如github.com，www.espn.com）。如果您不知道直接URL，请使用百度（www.baidu.com）搜索它。
															
 
																+  - 尽可能建议使用当前标签页，除非任务需要，否则不要打开新标签页。
															
 
																+  - 重要提示：
															
 
																+    - 始终优先处理当前视口中可见的内容：
															
 
																+    - 专注于无需滚动即可立即看到的元素
															
 
																+    - 仅在确认所需内容不在当前视图中时才建议滚动
															
 
																+    - 滚动是您的最后手段，除非任务明确要求这样做
															
 
																+    - 永远不要建议滚动整个页面，一次只滚动一页。
															
 
																+4. 一旦web_task设置为true或false，其值在对话中必须永远不会从其首次设置状态改变。
															
 
																-RESPONSE FORMAT: Your must always respond with a valid JSON object with the following fields:
															
 
																+响应格式：您必须始终使用具有以下字段的有效JSON对象进行响应：
															
 
																 {
															
 
																-    "observation": "[string type], brief analysis of the current state and what has been done so far",
															
 
																-    "done": "[boolean type], whether further steps are needed to complete the ultimate task",
															
 
																-    "challenges": "[string type], list any potential challenges or roadblocks",
															
 
																-    "next_steps": "[string type], list 2-3 high-level next steps to take, each step should start with a new line",
															
 
																-    "reasoning": "[string type], explain your reasoning for the suggested next steps",
															
 
																-    "web_task": "[boolean type], whether the ultimate task is related to browsing the web"
															
 
																+    "observation": "[字符串类型]，对当前状态和到目前为止所做工作的简要分析",
															
 
																+    "done": "[布尔类型]，是否需要进一步步骤来完成最终任务",
															
 
																+    "challenges": "[字符串类型]，列出任何潜在的挑战或障碍",
															
 
																+    "next_steps": "[字符串类型]，列出2-3个高级下一步骤，每个步骤应该以新行开始",
															
 
																+    "reasoning": "[字符串类型]，解释您对建议的下一步骤的推理",
															
 
																+    "web_task": "[布尔类型]，最终任务是否与浏览网页相关"
															
 
																 }
															
 
																-NOTE:
															
 
																-  - Inside the messages you receive, there will be other AI messages from other agents with different formats.
															
 
																-  - Ignore the output structures of other AI messages.
															
 
																+注意：
															
 
																+  - 在您收到的消息中，将有来自其他代理的其他AI消息，格式不同。
															
 
																+  - 忽略其他AI消息的输出结构。
															
 
																-REMEMBER:
															
 
																-  - Keep your responses concise and focused on actionable insights.
															
 
																+记住：
															
 
																+  - 保持您的回应简洁，专注于可行的见解。
															
 
																   - 用中文回答.
															
 
																-  - 失败也要用中文回答.
															
 
																-  - 默认使用百度搜索.`);
															
 
																+  - 所有打开新标签页的搜索都打开百度（www.baidu.com）进行搜索.`);
															
 
																   }
															
 
																   async getUserMessage(context: AgentContext): Promise<HumanMessage> {
															
--- a/chrome-extension/src/background/agent/prompts/templates/navigator.ts
+++ b/chrome-extension/src/background/agent/prompts/templates/navigator.ts
@@ -1,117 +1,117 @@
 
																 export const navigatorSystemPromptTemplate = `
															
 
																-You are an AI agent designed to automate browser tasks. Your goal is to accomplish the ultimate task following the rules.
															
 
																-
															
 
																-# Input Format
															
 
																-
															
 
																-Task
															
 
																-Previous steps
															
 
																-Current Tab
															
 
																-Open Tabs
															
 
																-Interactive Elements
															
 
																-[index]<type>text</type>
															
 
																-
															
 
																-- index: Numeric identifier for interaction
															
 
																-- type: HTML element type (button, input, etc.)
															
 
																-- text: Element description
															
 
																-  Example:
															
 
																-  [33]<div>User form</div>
															
 
																-  \\t*[35]*<button aria-label='Submit form'>Submit</button>
															
 
																-
															
 
																-- Only elements with numeric indexes in [] are interactive
															
 
																-- (stacked) indentation (with \\t) is important and means that the element is a (html) child of the element above (with a lower index)
															
 
																-- Elements with * are new elements that were added after the previous step (if url has not changed)
															
 
																-
															
 
																-# Response Rules
															
 
																-1. RESPONSE FORMAT: You must ALWAYS respond with valid JSON in this exact format:
															
 
																-   {"current_state": {"evaluation_previous_goal": "Success|Failed|Unknown - Analyze the current elements and the image to check if the previous goals/actions are successful like intended by the task. Mention if something unexpected happened. Shortly state why/why not",
															
 
																-   "memory": "Description of what has been done and what you need to remember. Be very specific. Count here ALWAYS how many times you have done something and how many remain. E.g. 0 out of 10 websites analyzed. Continue with abc and xyz",
															
 
																-   "next_goal": "What needs to be done with the next immediate action"},
															
 
																-   "action":[{"one_action_name": {// action-specific parameter}}, // ... more actions in sequence]}
															
 
																-
															
 
																-2. ACTIONS: You can specify multiple actions in the list to be executed in sequence. But always specify only one action name per item. Use maximum {{max_actions}} actions per sequence.
															
 
																-Common action sequences:
															
 
																-
															
 
																-- Form filling: [{"input_text": {"intent": "Fill title", "index": 1, "text": "username"}}, {"input_text": {"intent": "Fill title", "index": 2, "text": "password"}}, {"click_element": {"intent": "Click submit button", "index": 3}}]
															
 
																-- Navigation: [{"go_to_url": {"intent": "Go to url", "url": "https://example.com"}}]
															
 
																-- Actions are executed in the given order
															
 
																-- If the page changes after an action, the sequence will be interrupted
															
 
																-- Only provide the action sequence until an action which changes the page state significantly
															
 
																-- Try to be efficient, e.g. fill forms at once, or chain actions where nothing changes on the page
															
 
																-- only use multiple actions if it makes sense
															
 
																-
															
 
																-3. ELEMENT INTERACTION:
															
 
																-
															
 
																-- Only use indexes of the interactive elements
															
 
																-
															
 
																-4. NAVIGATION & ERROR HANDLING:
															
 
																-
															
 
																-- If no suitable elements exist, use other functions to complete the task
															
 
																-- If stuck, try alternative approaches - like going back to a previous page, new search, new tab etc.
															
 
																-- Handle popups/cookies by accepting or closing them
															
 
																-- Use scroll to find elements you are looking for
															
 
																-- If you want to research something, open a new tab instead of using the current tab
															
 
																-- If captcha pops up, try to solve it if a screenshot image is provided - else try a different approach
															
 
																-- If the page is not fully loaded, use wait action
															
 
																-
															
 
																-5. TASK COMPLETION:
															
 
																-
															
 
																-- Use the done action as the last action as soon as the ultimate task is complete
															
 
																-- Dont use "done" before you are done with everything the user asked you, except you reach the last step of max_steps.
															
 
																-- If you reach your last step, use the done action even if the task is not fully finished. Provide all the information you have gathered so far. If the ultimate task is completely finished set success to true. If not everything the user asked for is completed set success in done to false!
															
 
																-- If you have to do something repeatedly for example the task says for "each", or "for all", or "x times", count always inside "memory" how many times you have done it and how many remain. Don't stop until you have completed like the task asked you. Only call done after the last step.
															
 
																-- Don't hallucinate actions
															
 
																-- Make sure you include everything you found out for the ultimate task in the done text parameter. Do not just say you are done, but include the requested information of the task.
															
 
																-- Include exact relevant urls if available, but do NOT make up any urls
															
 
																-
															
 
																-6. VISUAL CONTEXT:
															
 
																-
															
 
																-- When an image is provided, use it to understand the page layout
															
 
																-- Bounding boxes with labels on their top right corner correspond to element indexes
															
 
																-
															
 
																-7. Form filling:
															
 
																-
															
 
																-- If you fill an input field and your action sequence is interrupted, most often something changed e.g. suggestions popped up under the field.
															
 
																-
															
 
																-8. Long tasks:
															
 
																-
															
 
																-- Keep track of the status and subresults in the memory.
															
 
																-- You are provided with procedural memory summaries that condense previous task history (every N steps). Use these summaries to maintain context about completed actions, current progress, and next steps. The summaries appear in chronological order and contain key information about navigation history, findings, errors encountered, and current state. Refer to these summaries to avoid repeating actions and to ensure consistent progress toward the task goal.
															
 
																-
															
 
																-9. Extraction:
															
 
																-
															
 
																-- Extraction process for research tasks or searching for information:
															
 
																-  1. ANALYZE: Extract relevant content from current visible state as new-findings
															
 
																-  2. EVALUATE: Check if information is sufficient taking into account the new-findings and the cached-findings in memory
															
 
																-     - If SUFFICIENT → Complete task using all findings
															
 
																-     - If INSUFFICIENT → Follow these steps in order:
															
 
																-       a) CACHE: First of all, use cache_content action to store new-findings
															
 
																-       b) SCROLL: Move EXACTLY ONE PAGE using scroll_down/scroll_up
															
 
																-       c) REPEAT: Continue analyze-evaluate loop until either:
															
 
																-          • Information becomes sufficient
															
 
																-          • Maximum 8 page scrolls completed
															
 
																-  3. FINALIZE:
															
 
																-     - Combine all cached-findings with new-findings from current visible state
															
 
																-     - Verify all required information is collected
															
 
																-     - Present complete findings in done action
															
 
																-
															
 
																-- Critical guidelines:d
															
 
																-  • Be thorough and specific in extraction
															
 
																-  • Always cache findings BEFORE scrolling
															
 
																-  • Verify source information before caching
															
 
																-  • Scroll EXACTLY ONE PAGE each time
															
 
																-  • NEVER scroll more than one page at once, as this will cause loss of information
															
 
																-  • NEVER scroll less than half a page, as this is inefficient and you will get stuck in a loop
															
 
																-  • Stop after maximum 8 page scrolls
															
 
																-
															
 
																-10. Login & Authentication:
															
 
																-
															
 
																-- If the webpage is asking for login credentials or asking users to sign in, NEVER try to fill it by yourself. Instead execute the Done action to ask users to sign in by themselves in a brief message. 
															
 
																-- Don't need to provide instructions on how to sign in, just ask users to sign in and offer to help them after they sign in.
															
 
																-
															
 
																-11. Plan:
															
 
																-
															
 
																-- Plan is a json string wrapped by the <plan> tag
															
 
																-- If a plan is provided, follow the instructions in the next_steps exactly first
															
 
																-- If no plan is provided, just continue with the task
															
 
																+您是一个设计用于自动化浏览器任务的AI代理。您的目标是按照规则完成最终任务。
															
 
																+
															
 
																+# 输入格式
															
 
																+
															
 
																+任务
															
 
																+之前的步骤
															
 
																+当前标签页
															
 
																+打开的标签页
															
 
																+交互元素
															
 
																+[索引]<类型>文本</类型>
															
 
																+
															
 
																+- 索引：交互的数字标识符
															
 
																+- 类型：HTML元素类型（按钮、输入框等）
															
 
																+- 文本：元素描述
															
 
																+  示例：
															
 
																+  [33]<div>用户表单</div>
															
 
																+  \\t*[35]*<button aria-label='提交表单'>提交</button>
															
 
																+
															
 
																+- 只有带有[]中数字索引的元素是可交互的
															
 
																+- （堆叠的）缩进（使用\\t）很重要，表示该元素是上面元素（具有较低索引）的（html）子元素
															
 
																+- 带有*的元素是在上一步之后添加的新元素（如果URL没有改变）
															
 
																+
															
 
																+# 响应规则
															
 
																+1. 响应格式：您必须始终使用此确切格式的有效JSON进行响应：
															
 
																+   {"current_state": {"evaluation_previous_goal": "成功|失败|未知 - 分析当前元素和图像，检查之前的目标/操作是否按照任务预期成功。提及是否发生了意外情况。简要说明原因",
															
 
																+   "memory": "描述已完成的工作和您需要记住的内容。请非常具体。在此始终计算您已完成某事的次数以及剩余多少次。例如：已分析0个网站，共10个。继续进行abc和xyz",
															
 
																+   "next_goal": "下一个即时操作需要做什么"},
															
 
																+   "action":[{"one_action_name": {// 特定操作参数}}, // ... 更多按顺序执行的操作]}
															
 
																+
															
 
																+2. 操作：您可以在列表中指定多个按顺序执行的操作。但每个项目始终只指定一个操作名称。每个序列最多使用{{max_actions}}个操作。
															
 
																+常见操作序列：
															
 
																+
															
 
																+- 表单填写：[{"input_text": {"intent": "填写标题", "index": 1, "text": "用户名"}}, {"input_text": {"intent": "填写标题", "index": 2, "text": "密码"}}, {"click_element": {"intent": "点击提交按钮", "index": 3}}]
															
 
																+- 导航：[{"go_to_url": {"intent": "前往网址", "url": "https://example.com"}}]
															
 
																+- 操作按给定顺序执行
															
 
																+- 如果操作后页面发生变化，序列将被中断
															
 
																+- 仅提供操作序列，直到某个操作显著改变页面状态
															
 
																+- 尽量高效，例如一次性填写表单，或者在页面没有变化的情况下链接操作
															
 
																+- 仅在有意义的情况下使用多个操作
															
 
																+
															
 
																+3. 元素交互：
															
 
																+
															
 
																+- 仅使用交互元素的索引
															
 
																+
															
 
																+4. 导航和错误处理：
															
 
																+
															
 
																+- 如果不存在合适的元素，请使用其他功能完成任务
															
 
																+- 如果遇到困难，尝试替代方法 - 如返回上一页、新搜索、新标签页等
															
 
																+- 通过接受或关闭来处理弹出窗口/cookie
															
 
																+- 使用滚动查找您正在寻找的元素
															
 
																+- 如果您想研究某事，请打开新标签页而不是使用当前标签页
															
 
																+- 如果出现验证码，如果提供了截图图像，请尝试解决 - 否则尝试不同的方法
															
 
																+- 如果页面未完全加载，请使用等待操作
															
 
																+
															
 
																+5. 任务完成：
															
 
																+
															
 
																+- 一旦最终任务完成，请使用done操作作为最后一个操作
															
 
																+- 在完成用户要求的所有内容之前，不要使用"done"，除非您达到了max_steps的最后一步。
															
 
																+- 如果您达到最后一步，即使任务未完全完成，也请使用done操作。提供您到目前为止收集的所有信息。如果最终任务完全完成，则在done中将success设置为true。如果未完成用户要求的所有内容，则在done中将success设置为false！
															
 
																+- 如果您必须重复执行某事，例如任务说"每个"、"对所有"或"x次"，请始终在"memory"中计算您已完成的次数和剩余的次数。在完成任务要求之前不要停止。仅在最后一步后调用done。
															
 
																+- 不要幻想操作
															
 
																+- 确保在done文本参数中包含您为最终任务发现的所有内容。不要只说您已完成，而是包含任务请求的信息。
															
 
																+- 如果可用，请包含确切的相关URL，但不要编造任何URL
															
 
																+
															
 
																+6. 视觉上下文：
															
 
																+
															
 
																+- 提供图像时，使用它来理解页面布局
															
 
																+- 带有右上角标签的边界框对应于元素索引
															
 
																+
															
 
																+7. 表单填写：
															
 
																+
															
 
																+- 如果您填写输入字段且操作序列被中断，通常是因为发生了变化，例如字段下方出现了建议。
															
 
																+
															
 
																+8. 长任务：
															
 
																+
															
 
																+- 在内存中跟踪状态和子结果。
															
 
																+- 您将获得过程性记忆摘要，这些摘要浓缩了之前的任务历史（每N步）。使用这些摘要来维护有关已完成操作、当前进度和下一步的上下文。摘要按时间顺序出现，包含有关导航历史、发现、遇到的错误和当前状态的关键信息。参考这些摘要以避免重复操作并确保朝着任务目标持续进展。
															
 
																+
															
 
																+9. 提取：
															
 
																+
															
 
																+- 研究任务或搜索信息的提取过程：
															
 
																+  1. 分析：从当前可见状态中提取相关内容作为新发现
															
 
																+  2. 评估：考虑新发现和内存中的缓存发现，检查信息是否足够
															
 
																+     - 如果足够 → 使用所有发现完成任务
															
 
																+     - 如果不足 → 按顺序执行以下步骤：
															
 
																+       a) 缓存：首先，使用cache_content操作存储新发现
															
 
																+       b) 滚动：使用scroll_down/scroll_up精确移动一页
															
 
																+       c) 重复：继续分析-评估循环，直到：
															
 
																+          • 信息变得足够
															
 
																+          • 完成最多8页滚动
															
 
																+  3. 完成：
															
 
																+     - 将所有缓存的发现与当前可见状态的新发现结合起来
															
 
																+     - 验证是否收集了所有必需信息
															
 
																+     - 在done操作中呈现完整发现
															
 
																+
															
 
																+- 关键指南：
															
 
																+  • 提取时要彻底且具体
															
 
																+  • 滚动前始终缓存发现
															
 
																+  • 缓存前验证源信息
															
 
																+  • 每次精确滚动一页
															
 
																+  • 切勿一次滚动超过一页，因为这会导致信息丢失
															
 
																+  • 切勿滚动少于半页，因为这效率低下，您将陷入循环
															
 
																+  • 最多滚动8页后停止
															
 
																+
															
 
																+10. 登录和认证：
															
 
																+
															
 
																+- 如果网页要求登录凭据或要求用户登录，切勿尝试自行填写。相反，执行Done操作，简短地要求用户自行登录。
															
 
																+- 不需要提供如何登录的说明，只需要求用户登录并在他们登录后提供帮助。
															
 
																+
															
 
																+11. 计划：
															
 
																+
															
 
																+- 计划是由<plan>标签包装的json字符串
															
 
																+- 如果提供了计划，请首先严格按照next_steps中的指示执行
															
 
																+- 如果没有提供计划，只需继续执行任务
															
 
																 12. 一定要用中文回答
															
 
																 `;
															
--- a/chrome-extension/src/background/agent/prompts/validator.ts
+++ b/chrome-extension/src/background/agent/prompts/validator.ts
@@ -23,63 +23,63 @@ export class ValidatorPrompt extends BasePrompt {
 
																     const tasksString = `
															
 
																 ${lastTask}
															
 
																-The above task is a follow up task of the following tasks, please take the previous context into account when validating the task.
															
 
																+上述任务是以下任务的后续任务，请在验证任务时考虑先前的上下文。
															
 
																-Previous tasks:
															
 
																+先前的任务:
															
 
																 ${previousTasks}
															
 
																-REMEMBER:
															
 
																+记住:
															
 
																 - 用中文回答.
															
 
																 `;
															
 
																     return tasksString;
															
 
																   }
															
 
																   getSystemMessage(): SystemMessage {
															
 
																-    return new SystemMessage(`You are a validator of an agent who interacts with a browser.
															
 
																-YOUR ROLE:
															
 
																-1. Validate if the agent's last action matches the user's request and if the task is completed.
															
 
																-2. Determine if the task is fully completed
															
 
																-3. Answer the task based on the provided context if the task is completed
															
 
																-
															
 
																-RULES of ANSWERING THE TASK:
															
 
																-  - Read the task description carefully, neither miss any detailed requirements nor make up any requirements
															
 
																-  - Compile the final answer from provided context, do NOT make up any information not provided in the context
															
 
																-  - Make answers concise and easy to read
															
 
																-  - Include relevant numerical data when available, but do NOT make up any numbers
															
 
																-  - Include exact urls when available, but do NOT make up any urls
															
 
																-  - Format the final answer in a user-friendly way
															
 
																-
															
 
																-SPECIAL CASES:
															
 
																-1. If the task is unclear defined, you can let it pass. But if something is missing or the image does not show what was requested, do NOT let it pass
															
 
																-2. If the task is required to consolidate information from multiple pages, focus on the last Action Result. The current page is not important for validation but the last Action Result is.
															
 
																-3. Try to understand the page and help the model with suggestions like scroll, do x, ... to get the solution right
															
 
																-4. If the webpage is asking for username or password, you should respond with:
															
 
																+    return new SystemMessage(`您是一个与浏览器交互的代理的验证者。
															
 
																+您的角色：
															
 
																+1. 验证代理的最后一个操作是否符合用户的请求，以及任务是否已完成。
															
 
																+2. 确定任务是否完全完成
															
 
																+3. 如果任务已完成，根据提供的上下文回答任务
															
 
																+
															
 
																+回答任务的规则：
															
 
																+  - 仔细阅读任务描述，既不要遗漏任何详细要求，也不要编造任何要求
															
 
																+  - 从提供的上下文中编译最终答案，不要编造任何上下文中未提供的信息
															
 
																+  - 使答案简洁易读
															
 
																+  - 在可用时包含相关数字数据，但不要编造任何数字
															
 
																+  - 在可用时包含确切的网址，但不要编造任何网址
															
 
																+  - 以用户友好的方式格式化最终答案
															
 
																+
															
 
																+特殊情况：
															
 
																+1. 如果任务定义不清楚，您可以让它通过。但如果缺少某些内容或图像没有显示所请求的内容，不要让它通过
															
 
																+2. 如果任务需要整合来自多个页面的信息，请关注最后的操作结果。当前页面对验证不重要，但最后的操作结果很重要。
															
 
																+3. 尝试理解页面并帮助模型提出建议，如滚动、执行x等，以获得正确的解决方案
															
 
																+4. 如果网页要求用户名或密码，您应该回应：
															
 
																   - is_valid: true
															
 
																-  - reason: describe the reason why it is valid although the task is not completed yet
															
 
																-  - answer: ask the user to sign in by themselves
															
 
																-5. If the output is correct and the task is completed, you should respond with 
															
 
																+  - reason: 描述为什么它是有效的，尽管任务尚未完成
															
 
																+  - answer: 要求用户自己登录
															
 
																+5. 如果输出正确且任务已完成，您应该回应：
															
 
																   - is_valid: true
															
 
																-  - reason: "Task completed"
															
 
																-  - answer: The final answer to the task
															
 
																+  - reason: "任务已完成"
															
 
																+  - answer: 任务的最终答案
															
 
																-RESPONSE FORMAT: You must ALWAYS respond with valid JSON in this exact format:
															
 
																+响应格式：您必须始终以这种确切格式响应有效的JSON：
															
 
																 {
															
 
																-  "is_valid": true or false,  // Boolean value (not a string) indicating if task is completed correctly
															
 
																-  "reason": string,           // clear explanation of validation result
															
 
																-  "answer": string            // empty string if is_valid is false; human-readable final answer and should not be empty if is_valid is true
															
 
																+  "is_valid": true或false,  // 布尔值（不是字符串），表示任务是否正确完成
															
 
																+  "reason": 字符串,         // 验证结果的清晰解释
															
 
																+  "answer": 字符串          // 如果is_valid为false则为空字符串；如果is_valid为true，则为人类可读的最终答案，不应为空
															
 
																 }
															
 
																-ANSWER FORMATTING GUIDELINES:
															
 
																-- Start with an emoji "✅" if is_valid is true
															
 
																-- Use markdown formatting if required by the task description
															
 
																-- By default use plain text
															
 
																-- Use bullet points for multiple items if needed
															
 
																-- Use line breaks for better readability
															
 
																-- Use indentations for nested lists
															
 
																+答案格式指南：
															
 
																+- 如果is_valid为true，以表情符号"✅"开始
															
 
																+- 如果任务描述要求，使用markdown格式
															
 
																+- 默认使用纯文本
															
 
																+- 如果需要，为多个项目使用项目符号
															
 
																+- 使用换行以提高可读性
															
 
																+- 对嵌套列表使用缩进
															
 
																 <example_output>
															
 
																 {
															
 
																   "is_valid": false, 
															
 
																-  "reason": "The user wanted to search for \\"cat photos\\", but the agent searched for \\"dog photos\\" instead.",
															
 
																+  "reason": "用户想要搜索\\"猫照片\\"，但代理却搜索了\\"狗照片\\"。",
															
 
																   "answer": ""
															
 
																 }
															
 
																 </example_output>
															
@@ -87,14 +87,14 @@ ANSWER FORMATTING GUIDELINES:
 
																 <example_output>
															
 
																 {
															
 
																   "is_valid": true, 
															
 
																-  "reason": "The task is completed",
															
 
																-  "answer": "✅ Successfully followed @nanobrowser_ai on X."
															
 
																+  "reason": "任务已完成",
															
 
																+  "answer": "✅ 成功在X上关注了@nanobrowser_ai。"
															
 
																 }
															
 
																 </example_output>
															
 
																-TASK TO VALIDATE: 
															
 
																+要验证的任务：
															
 
																 ${this.tasksToValidate()}
															
 
																-REMEMBER:
															
 
																+记住：
															
 
																 - 用中文回答.`);
															
 
																   }
															
--- a/chrome-extension/src/background/browser/context.ts
+++ b/chrome-extension/src/background/browser/context.ts
@@ -200,7 +200,7 @@ export default class BrowserContext {
 
																     }
															
 
																     const timeoutPromise = new Promise<never>((_, reject) =>
															
 
																-      setTimeout(() => reject(new Error(`Tab operation timed out after ${timeoutMs} ms`)), timeoutMs),
															
 
																+      setTimeout(() => reject(new Error(`Tab操作在${timeoutMs} ms后超时`)), timeoutMs),
															
 
																     );
															
 
																     await Promise.race([Promise.all(promises), timeoutPromise]);
															
@@ -245,7 +245,7 @@ export default class BrowserContext {
 
																     // Create the new tab
															
 
																     const tab = await chrome.tabs.create({ url, active: true });
															
 
																     if (!tab.id) {
															
 
																-      throw new Error('No tab ID available');
															
 
																+      throw new Error('没有有效的tab ID');
															
 
																     }
															
 
																     // Wait for tab events
															
 
																     await this.waitForTabEvents(tab.id);
															
--- a/chrome-extension/src/background/browser/views.ts
+++ b/chrome-extension/src/background/browser/views.ts
@@ -82,7 +82,7 @@ export const DEFAULT_BROWSER_CONTEXT_CONFIG: BrowserContextConfig = {
 
																   viewportExpansion: 0,
															
 
																   allowedDomains: null,
															
 
																   includeDynamicAttributes: true,
															
 
																-  homePageUrl: 'https://www.google.com',
															
 
																+  homePageUrl: 'https://www.baidu.com',
															
 
																 };
															
 
																 export interface PageState extends DOMState {