123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349 |
- import { z } from 'zod';
- import { BaseAgent, type BaseAgentOptions, type ExtraAgentOptions } from './base';
- import { createLogger } from '@src/background/log';
- import { ActionResult, type AgentOutput } from '../types';
- import type { Action } from '../actions/builder';
- import { buildDynamicActionSchema } from '../actions/builder';
- import { agentBrainSchema } from '../types';
- import { type BaseMessage, HumanMessage } from '@langchain/core/messages';
- import { Actors, ExecutionState } from '../event/types';
- import {
- ChatModelAuthError,
- ChatModelForbiddenError,
- isAuthenticationError,
- isForbiddenError,
- LLM_FORBIDDEN_ERROR_MESSAGE,
- } from './errors';
- import { jsonNavigatorOutputSchema } from '../actions/json_schema';
- import { geminiNavigatorOutputSchema } from '../actions/json_gemini';
- import { calcBranchPathHashSet } from '@src/background/dom/views';
- const logger = createLogger('NavigatorAgent');
- export class NavigatorActionRegistry {
- private actions: Record<string, Action> = {};
- constructor(actions: Action[]) {
- for (const action of actions) {
- this.registerAction(action);
- }
- }
- registerAction(action: Action): void {
- this.actions[action.name()] = action;
- }
- unregisterAction(name: string): void {
- delete this.actions[name];
- }
- getAction(name: string): Action | undefined {
- return this.actions[name];
- }
- setupModelOutputSchema(): z.ZodType {
- const actionSchema = buildDynamicActionSchema(Object.values(this.actions));
- return z.object({
- current_state: agentBrainSchema,
- action: z.array(actionSchema),
- });
- }
- }
- export interface NavigatorResult {
- done: boolean;
- }
- export class NavigatorAgent extends BaseAgent<z.ZodType, NavigatorResult> {
- private actionRegistry: NavigatorActionRegistry;
- private jsonSchema: Record<string, unknown>;
- constructor(
- actionRegistry: NavigatorActionRegistry,
- options: BaseAgentOptions,
- extraOptions?: Partial<ExtraAgentOptions>,
- ) {
- super(actionRegistry.setupModelOutputSchema(), options, { ...extraOptions, id: 'navigator' });
- this.actionRegistry = actionRegistry;
- this.jsonSchema = this.modelName.startsWith('gemini') ? geminiNavigatorOutputSchema : jsonNavigatorOutputSchema;
- // logger.info('Navigator zod schema', JSON.stringify(zodToJsonSchema(this.modelOutputSchema), null, 2));
- }
- async invoke(inputMessages: BaseMessage[]): Promise<this['ModelOutput']> {
- // Use structured output
- if (this.withStructuredOutput) {
- const structuredLlm = this.chatLLM.withStructuredOutput(this.jsonSchema, {
- includeRaw: true,
- name: this.modelOutputToolName,
- });
- let response = undefined;
- try {
- response = await structuredLlm.invoke(inputMessages, {
- ...this.callOptions,
- });
- if (response.parsed) {
- return response.parsed;
- }
- } catch (error) {
- const errorMessage = `Failed to invoke ${this.modelName} with structured output: ${error}`;
- throw new Error(errorMessage);
- }
- // Use type assertion to access the properties
- const rawResponse = response.raw as BaseMessage & {
- tool_calls?: Array<{
- args: {
- currentState: typeof agentBrainSchema._type;
- action: z.infer<ReturnType<typeof buildDynamicActionSchema>>;
- };
- }>;
- };
- // sometimes LLM returns an empty content, but with one or more tool calls, so we need to check the tool calls
- if (rawResponse.tool_calls && rawResponse.tool_calls.length > 0) {
- logger.info('Navigator structuredLlm tool call with empty content', rawResponse.tool_calls);
- // only use the first tool call
- const toolCall = rawResponse.tool_calls[0];
- return {
- current_state: toolCall.args.currentState,
- action: [...toolCall.args.action],
- };
- }
- throw new Error('Could not parse response');
- }
- throw new Error('Navigator needs to work with LLM that supports tool calling');
- }
- async execute(): Promise<AgentOutput<NavigatorResult>> {
- const agentOutput: AgentOutput<NavigatorResult> = {
- id: this.id,
- };
- let cancelled = false;
- try {
- this.context.emitEvent(Actors.NAVIGATOR, ExecutionState.STEP_START, 'Navigating...');
- const messageManager = this.context.messageManager;
- // add the browser state message
- await this.addStateMessageToMemory();
- // check if the task is paused or stopped
- if (this.context.paused || this.context.stopped) {
- cancelled = true;
- return agentOutput;
- }
- // call the model to get the actions to take
- const inputMessages = messageManager.getMessages();
- const modelOutput = await this.invoke(inputMessages);
- // check if the task is paused or stopped
- if (this.context.paused || this.context.stopped) {
- cancelled = true;
- return agentOutput;
- }
- // remove the last state message from memory before adding the model output
- this.removeLastStateMessageFromMemory();
- this.addModelOutputToMemory(modelOutput);
- // take the actions
- const actionResults = await this.doMultiAction(modelOutput);
- this.context.actionResults = actionResults;
- // check if the task is paused or stopped
- if (this.context.paused || this.context.stopped) {
- cancelled = true;
- return agentOutput;
- }
- // emit event
- this.context.emitEvent(Actors.NAVIGATOR, ExecutionState.STEP_OK, 'Navigation done');
- let done = false;
- if (actionResults.length > 0 && actionResults[actionResults.length - 1].isDone) {
- done = true;
- }
- agentOutput.result = { done };
- return agentOutput;
- } catch (error) {
- this.removeLastStateMessageFromMemory();
- // Check if this is an authentication error
- if (isAuthenticationError(error)) {
- throw new ChatModelAuthError('Navigator API Authentication failed. Please verify your API key', error);
- }
- if (isForbiddenError(error)) {
- throw new ChatModelForbiddenError(LLM_FORBIDDEN_ERROR_MESSAGE, error);
- }
- const errorMessage = error instanceof Error ? error.message : String(error);
- const errorString = `Navigation failed: ${errorMessage}`;
- logger.error(errorString);
- this.context.emitEvent(Actors.NAVIGATOR, ExecutionState.STEP_FAIL, errorString);
- agentOutput.error = errorMessage;
- return agentOutput;
- } finally {
- // if the task is cancelled, remove the last state message from memory and emit event
- if (cancelled) {
- this.removeLastStateMessageFromMemory();
- this.context.emitEvent(Actors.NAVIGATOR, ExecutionState.STEP_CANCEL, 'Navigation cancelled');
- }
- }
- }
- /**
- * Add the state message to the memory
- */
- public async addStateMessageToMemory() {
- if (this.context.stateMessageAdded) {
- return;
- }
- const messageManager = this.context.messageManager;
- // Handle results that should be included in memory
- if (this.context.actionResults.length > 0) {
- let index = 0;
- for (const r of this.context.actionResults) {
- if (r.includeInMemory) {
- if (r.extractedContent) {
- const msg = new HumanMessage(`Action result: ${r.extractedContent}`);
- // logger.info('Adding action result to memory', msg.content);
- messageManager.addMessageWithTokens(msg);
- }
- if (r.error) {
- // Get error text and convert to string
- const errorText = r.error.toString().trim();
- // Get only the last line of the error
- const lastLine = errorText.split('\n').pop() || '';
- const msg = new HumanMessage(`Action error: ${lastLine}`);
- logger.info('Adding action error to memory', msg.content);
- messageManager.addMessageWithTokens(msg);
- }
- // reset this action result to empty, we dont want to add it again in the state message
- // NOTE: in python version, all action results are reset to empty, but in ts version, only those included in memory are reset to empty
- this.context.actionResults[index] = new ActionResult();
- }
- index++;
- }
- }
- const state = await this.prompt.getUserMessage(this.context);
- messageManager.addStateMessage(state);
- this.context.stateMessageAdded = true;
- }
- /**
- * Remove the last state message from the memory
- */
- protected async removeLastStateMessageFromMemory() {
- if (!this.context.stateMessageAdded) return;
- const messageManager = this.context.messageManager;
- messageManager.removeLastStateMessage();
- this.context.stateMessageAdded = false;
- }
- private async addModelOutputToMemory(modelOutput: this['ModelOutput']) {
- const messageManager = this.context.messageManager;
- messageManager.addModelOutput(modelOutput);
- }
- private async doMultiAction(response: this['ModelOutput']): Promise<ActionResult[]> {
- const results: ActionResult[] = [];
- let errCount = 0;
- logger.info('Actions', response.action);
- // sometimes response.action is a string, but not an array as expected, so we need to parse it as an array
- let actions: Record<string, unknown>[] = [];
- if (Array.isArray(response.action)) {
- // if the item is null, skip it
- actions = response.action.filter((item: unknown) => item !== null);
- if (actions.length === 0) {
- logger.warning('No valid actions found', response.action);
- }
- } else if (typeof response.action === 'string') {
- try {
- logger.warning('Unexpected action format', response.action);
- // try to parse the action as an JSON object
- actions = JSON.parse(response.action);
- } catch (error) {
- logger.error('Invalid action format', response.action);
- throw new Error('Invalid action output format');
- }
- } else {
- // if the action is neither an array nor a string, it should be an object
- actions = [response.action];
- }
- const browserContext = this.context.browserContext;
- const browserState = await browserContext.getState();
- const cachedPathHashes = await calcBranchPathHashSet(browserState);
- await browserContext.removeHighlight();
- for (const [i, action] of actions.entries()) {
- const actionName = Object.keys(action)[0];
- const actionArgs = action[actionName];
- try {
- // check if the task is paused or stopped
- if (this.context.paused || this.context.stopped) {
- return results;
- }
- const actionInstance = this.actionRegistry.getAction(actionName);
- if (actionInstance === undefined) {
- throw new Error(`Action ${actionName} not exists`);
- }
- const indexArg = actionInstance.getIndexArg(actionArgs);
- if (i > 0 && indexArg !== null) {
- const newState = await browserContext.getState();
- const newPathHashes = await calcBranchPathHashSet(newState);
- // next action requires index but there are new elements on the page
- if (!newPathHashes.isSubsetOf(cachedPathHashes)) {
- const msg = `Something new appeared after action ${i} / ${actions.length}`;
- logger.info(msg);
- results.push(
- new ActionResult({
- extractedContent: msg,
- includeInMemory: true,
- }),
- );
- break;
- }
- }
- const result = await actionInstance.call(actionArgs);
- if (result === undefined) {
- throw new Error(`Action ${actionName} returned undefined`);
- }
- results.push(result);
- // check if the task is paused or stopped
- if (this.context.paused || this.context.stopped) {
- return results;
- }
- // TODO: wait for 1 second for now, need to optimize this to avoid unnecessary waiting
- await new Promise(resolve => setTimeout(resolve, 1000));
- } catch (error) {
- const errorMessage = error instanceof Error ? error.message : String(error);
- logger.error('doAction error', actionName, actionArgs, errorMessage);
- // unexpected error, emit event
- this.context.emitEvent(Actors.NAVIGATOR, ExecutionState.ACT_FAIL, errorMessage);
- errCount++;
- if (errCount > 3) {
- throw new Error('Too many errors in actions');
- }
- results.push(
- new ActionResult({
- error: errorMessage,
- isDone: false,
- includeInMemory: true,
- }),
- );
- }
- }
- return results;
- }
- }
|