index.ts 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239
  1. import 'webextension-polyfill';
  2. import { agentModelStore, AgentNameEnum, generalSettingsStore, llmProviderStore } from '@extension/storage';
  3. import BrowserContext from './browser/context';
  4. import { Executor } from './agent/executor';
  5. import { createLogger } from './log';
  6. import { ExecutionState } from './agent/event/types';
  7. import { createChatModel } from './agent/helper';
  8. import { BaseChatModel } from '@langchain/core/language_models/chat_models';
  9. const logger = createLogger('background');
  10. const browserContext = new BrowserContext({});
  11. let currentExecutor: Executor | null = null;
  12. let currentPort: chrome.runtime.Port | null = null;
  13. // Setup side panel behavior
  14. chrome.sidePanel.setPanelBehavior({ openPanelOnActionClick: true }).catch(error => console.error(error));
  15. // Function to check if script is already injected
  16. async function isScriptInjected(tabId: number): Promise<boolean> {
  17. try {
  18. const results = await chrome.scripting.executeScript({
  19. target: { tabId },
  20. func: () => Object.prototype.hasOwnProperty.call(window, 'buildDomTree'),
  21. });
  22. return results[0]?.result || false;
  23. } catch (err) {
  24. console.error('Failed to check script injection status:', err);
  25. return false;
  26. }
  27. }
  28. // // Function to inject the buildDomTree script
  29. async function injectBuildDomTree(tabId: number) {
  30. try {
  31. // Check if already injected
  32. const alreadyInjected = await isScriptInjected(tabId);
  33. if (alreadyInjected) {
  34. console.log('Scripts already injected, skipping...');
  35. return;
  36. }
  37. await chrome.scripting.executeScript({
  38. target: { tabId },
  39. files: ['buildDomTree.js'],
  40. });
  41. console.log('Scripts successfully injected');
  42. } catch (err) {
  43. console.error('Failed to inject scripts:', err);
  44. }
  45. }
  46. chrome.tabs.onUpdated.addListener(async (tabId, changeInfo, tab) => {
  47. if (tabId && changeInfo.status === 'complete' && tab.url?.startsWith('http')) {
  48. await injectBuildDomTree(tabId);
  49. }
  50. });
  51. // Listen for debugger detached event
  52. // if canceled_by_user, remove the tab from the browser context
  53. chrome.debugger.onDetach.addListener(async (source, reason) => {
  54. console.log('Debugger detached:', source, reason);
  55. if (reason === 'canceled_by_user') {
  56. if (source.tabId) {
  57. await browserContext.cleanup();
  58. }
  59. }
  60. });
  61. // Cleanup when tab is closed
  62. chrome.tabs.onRemoved.addListener(tabId => {
  63. browserContext.removeAttachedPage(tabId);
  64. });
  65. logger.info('background loaded');
  66. // Setup connection listener
  67. chrome.runtime.onConnect.addListener(port => {
  68. if (port.name === 'side-panel-connection') {
  69. currentPort = port;
  70. port.onMessage.addListener(async message => {
  71. try {
  72. switch (message.type) {
  73. case 'heartbeat':
  74. // Acknowledge heartbeat
  75. port.postMessage({ type: 'heartbeat_ack' });
  76. break;
  77. case 'new_task': {
  78. if (!message.task) return port.postMessage({ type: 'error', error: 'No task provided' });
  79. if (!message.tabId) return port.postMessage({ type: 'error', error: 'No tab ID provided' });
  80. logger.info('new_task', message.tabId, message.task);
  81. currentExecutor = await setupExecutor(message.taskId, message.task, browserContext);
  82. subscribeToExecutorEvents(currentExecutor);
  83. const result = await currentExecutor.execute();
  84. logger.info('new_task execution result', message.tabId, result);
  85. break;
  86. }
  87. case 'follow_up_task': {
  88. if (!message.task) return port.postMessage({ type: 'error', error: 'No follow up task provided' });
  89. if (!message.tabId) return port.postMessage({ type: 'error', error: 'No tab ID provided' });
  90. logger.info('follow_up_task', message.tabId, message.task);
  91. // If executor exists, add follow-up task
  92. if (currentExecutor) {
  93. currentExecutor.addFollowUpTask(message.task);
  94. // Re-subscribe to events in case the previous subscription was cleaned up
  95. subscribeToExecutorEvents(currentExecutor);
  96. const result = await currentExecutor.execute();
  97. logger.info('follow_up_task execution result', message.tabId, result);
  98. } else {
  99. // executor was cleaned up, can not add follow-up task
  100. logger.info('follow_up_task: executor was cleaned up, can not add follow-up task');
  101. return port.postMessage({ type: 'error', error: 'Executor was cleaned up, can not add follow-up task' });
  102. }
  103. break;
  104. }
  105. case 'cancel_task': {
  106. if (!currentExecutor) return port.postMessage({ type: 'error', error: 'No task to cancel' });
  107. await currentExecutor.cancel();
  108. break;
  109. }
  110. case 'screenshot': {
  111. if (!message.tabId) return port.postMessage({ type: 'error', error: 'No tab ID provided' });
  112. const page = await browserContext.switchTab(message.tabId);
  113. const screenshot = await page.takeScreenshot();
  114. logger.info('screenshot', message.tabId, screenshot);
  115. return port.postMessage({ type: 'success', screenshot });
  116. }
  117. case 'resume_task': {
  118. if (!currentExecutor) return port.postMessage({ type: 'error', error: 'No task to resume' });
  119. await currentExecutor.resume();
  120. return port.postMessage({ type: 'success' });
  121. }
  122. case 'pause_task': {
  123. if (!currentExecutor) return port.postMessage({ type: 'error', error: 'No task to pause' });
  124. await currentExecutor.pause();
  125. return port.postMessage({ type: 'success' });
  126. }
  127. default:
  128. return port.postMessage({ type: 'error', error: 'Unknown message type' });
  129. }
  130. } catch (error) {
  131. console.error('Error handling port message:', error);
  132. port.postMessage({
  133. type: 'error',
  134. error: error instanceof Error ? error.message : 'Unknown error',
  135. });
  136. }
  137. });
  138. port.onDisconnect.addListener(() => {
  139. console.log('Side panel disconnected');
  140. currentPort = null;
  141. });
  142. }
  143. });
  144. async function setupExecutor(taskId: string, task: string, browserContext: BrowserContext) {
  145. const providers = await llmProviderStore.getAllProviders();
  146. // if no providers, need to display the options page
  147. if (Object.keys(providers).length === 0) {
  148. throw new Error('Please configure API keys in the settings first');
  149. }
  150. const agentModels = await agentModelStore.getAllAgentModels();
  151. // verify if every provider used in the agent models exists in the providers
  152. for (const agentModel of Object.values(agentModels)) {
  153. if (!providers[agentModel.provider]) {
  154. throw new Error(`Provider ${agentModel.provider} not found in the settings`);
  155. }
  156. }
  157. const navigatorModel = agentModels[AgentNameEnum.Navigator];
  158. if (!navigatorModel) {
  159. throw new Error('Please choose a model for the navigator in the settings first');
  160. }
  161. const navigatorLLM = createChatModel(providers[navigatorModel.provider], navigatorModel);
  162. let plannerLLM: BaseChatModel | null = null;
  163. const plannerModel = agentModels[AgentNameEnum.Planner];
  164. if (plannerModel) {
  165. plannerLLM = createChatModel(providers[plannerModel.provider], plannerModel);
  166. }
  167. let validatorLLM: BaseChatModel | null = null;
  168. const validatorModel = agentModels[AgentNameEnum.Validator];
  169. if (validatorModel) {
  170. validatorLLM = createChatModel(providers[validatorModel.provider], validatorModel);
  171. }
  172. const generalSettings = await generalSettingsStore.getSettings();
  173. const executor = new Executor(task, taskId, browserContext, navigatorLLM, {
  174. plannerLLM: plannerLLM ?? navigatorLLM,
  175. validatorLLM: validatorLLM ?? navigatorLLM,
  176. agentOptions: {
  177. maxSteps: generalSettings.maxSteps,
  178. maxFailures: generalSettings.maxFailures,
  179. maxActionsPerStep: generalSettings.maxActionsPerStep,
  180. useVision: generalSettings.useVision,
  181. useVisionForPlanner: generalSettings.useVisionForPlanner,
  182. planningInterval: generalSettings.planningInterval,
  183. },
  184. });
  185. return executor;
  186. }
  187. // Update subscribeToExecutorEvents to use port
  188. async function subscribeToExecutorEvents(executor: Executor) {
  189. // Clear previous event listeners to prevent multiple subscriptions
  190. executor.clearExecutionEvents();
  191. // Subscribe to new events
  192. executor.subscribeExecutionEvents(async event => {
  193. try {
  194. if (currentPort) {
  195. currentPort.postMessage(event);
  196. }
  197. } catch (error) {
  198. logger.error('Failed to send message to side panel:', error);
  199. }
  200. if (
  201. event.state === ExecutionState.TASK_OK ||
  202. event.state === ExecutionState.TASK_FAIL ||
  203. event.state === ExecutionState.TASK_CANCEL
  204. ) {
  205. await currentExecutor?.cleanup();
  206. }
  207. });
  208. }