index.ts 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238
  1. import 'webextension-polyfill';
  2. import { agentModelStore, AgentNameEnum, generalSettingsStore, llmProviderStore } from '@extension/storage';
  3. import BrowserContext from './browser/context';
  4. import { Executor } from './agent/executor';
  5. import { createLogger } from './log';
  6. import { ExecutionState } from './agent/event/types';
  7. import { createChatModel } from './agent/helper';
  8. const logger = createLogger('background');
  9. const browserContext = new BrowserContext({});
  10. let currentExecutor: Executor | null = null;
  11. let currentPort: chrome.runtime.Port | null = null;
  12. // Setup side panel behavior
  13. chrome.sidePanel.setPanelBehavior({ openPanelOnActionClick: true }).catch(error => console.error(error));
  14. // Function to check if script is already injected
  15. async function isScriptInjected(tabId: number): Promise<boolean> {
  16. try {
  17. const results = await chrome.scripting.executeScript({
  18. target: { tabId },
  19. func: () => Object.prototype.hasOwnProperty.call(window, 'buildDomTree'),
  20. });
  21. return results[0]?.result || false;
  22. } catch (err) {
  23. console.error('Failed to check script injection status:', err);
  24. return false;
  25. }
  26. }
  27. // // Function to inject the buildDomTree script
  28. async function injectBuildDomTree(tabId: number) {
  29. try {
  30. // Check if already injected
  31. const alreadyInjected = await isScriptInjected(tabId);
  32. if (alreadyInjected) {
  33. console.log('Scripts already injected, skipping...');
  34. return;
  35. }
  36. await chrome.scripting.executeScript({
  37. target: { tabId },
  38. files: ['buildDomTree.js'],
  39. });
  40. console.log('Scripts successfully injected');
  41. } catch (err) {
  42. console.error('Failed to inject scripts:', err);
  43. }
  44. }
  45. chrome.tabs.onUpdated.addListener(async (tabId, changeInfo, tab) => {
  46. if (tabId && changeInfo.status === 'complete' && tab.url?.startsWith('http')) {
  47. await injectBuildDomTree(tabId);
  48. }
  49. });
  50. // Listen for debugger detached event
  51. // if canceled_by_user, remove the tab from the browser context
  52. chrome.debugger.onDetach.addListener(async (source, reason) => {
  53. console.log('Debugger detached:', source, reason);
  54. if (reason === 'canceled_by_user') {
  55. if (source.tabId) {
  56. await browserContext.cleanup();
  57. }
  58. }
  59. });
  60. // Cleanup when tab is closed
  61. chrome.tabs.onRemoved.addListener(tabId => {
  62. browserContext.removeAttachedPage(tabId);
  63. });
  64. logger.info('background loaded');
  65. // Setup connection listener
  66. chrome.runtime.onConnect.addListener(port => {
  67. if (port.name === 'side-panel-connection') {
  68. currentPort = port;
  69. port.onMessage.addListener(async message => {
  70. try {
  71. switch (message.type) {
  72. case 'heartbeat':
  73. // Acknowledge heartbeat
  74. port.postMessage({ type: 'heartbeat_ack' });
  75. break;
  76. case 'new_task': {
  77. if (!message.task) return port.postMessage({ type: 'error', error: 'No task provided' });
  78. if (!message.tabId) return port.postMessage({ type: 'error', error: 'No tab ID provided' });
  79. logger.info('new_task', message.tabId, message.task);
  80. currentExecutor = await setupExecutor(message.taskId, message.task, browserContext);
  81. subscribeToExecutorEvents(currentExecutor);
  82. const result = await currentExecutor.execute();
  83. logger.info('new_task execution result', message.tabId, result);
  84. break;
  85. }
  86. case 'follow_up_task': {
  87. if (!message.task) return port.postMessage({ type: 'error', error: 'No follow up task provided' });
  88. if (!message.tabId) return port.postMessage({ type: 'error', error: 'No tab ID provided' });
  89. logger.info('follow_up_task', message.tabId, message.task);
  90. // If executor exists, add follow-up task
  91. if (currentExecutor) {
  92. currentExecutor.addFollowUpTask(message.task);
  93. // Re-subscribe to events in case the previous subscription was cleaned up
  94. subscribeToExecutorEvents(currentExecutor);
  95. const result = await currentExecutor.execute();
  96. logger.info('follow_up_task execution result', message.tabId, result);
  97. } else {
  98. // executor was cleaned up, can not add follow-up task
  99. logger.info('follow_up_task: executor was cleaned up, can not add follow-up task');
  100. return port.postMessage({ type: 'error', error: 'Executor was cleaned up, can not add follow-up task' });
  101. }
  102. break;
  103. }
  104. case 'cancel_task': {
  105. if (!currentExecutor) return port.postMessage({ type: 'error', error: 'No task to cancel' });
  106. await currentExecutor.cancel();
  107. break;
  108. }
  109. case 'screenshot': {
  110. if (!message.tabId) return port.postMessage({ type: 'error', error: 'No tab ID provided' });
  111. const page = await browserContext.switchTab(message.tabId);
  112. const screenshot = await page.takeScreenshot();
  113. logger.info('screenshot', message.tabId, screenshot);
  114. return port.postMessage({ type: 'success', screenshot });
  115. }
  116. case 'resume_task': {
  117. if (!currentExecutor) return port.postMessage({ type: 'error', error: 'No task to resume' });
  118. await currentExecutor.resume();
  119. return port.postMessage({ type: 'success' });
  120. }
  121. case 'pause_task': {
  122. if (!currentExecutor) return port.postMessage({ type: 'error', error: 'No task to pause' });
  123. await currentExecutor.pause();
  124. return port.postMessage({ type: 'success' });
  125. }
  126. default:
  127. return port.postMessage({ type: 'error', error: 'Unknown message type' });
  128. }
  129. } catch (error) {
  130. console.error('Error handling port message:', error);
  131. port.postMessage({
  132. type: 'error',
  133. error: error instanceof Error ? error.message : 'Unknown error',
  134. });
  135. }
  136. });
  137. port.onDisconnect.addListener(() => {
  138. console.log('Side panel disconnected');
  139. currentPort = null;
  140. });
  141. }
  142. });
  143. async function setupExecutor(taskId: string, task: string, browserContext: BrowserContext) {
  144. const providers = await llmProviderStore.getAllProviders();
  145. // if no providers, need to display the options page
  146. if (Object.keys(providers).length === 0) {
  147. throw new Error('Please configure API keys in the settings first');
  148. }
  149. const agentModels = await agentModelStore.getAllAgentModels();
  150. // verify if every provider used in the agent models exists in the providers
  151. for (const agentModel of Object.values(agentModels)) {
  152. if (!providers[agentModel.provider]) {
  153. throw new Error(`Provider ${agentModel.provider} not found in the settings`);
  154. }
  155. }
  156. const navigatorModel = agentModels[AgentNameEnum.Navigator];
  157. if (!navigatorModel) {
  158. throw new Error('Please choose a model for the navigator in the settings first');
  159. }
  160. const navigatorLLM = createChatModel(providers[navigatorModel.provider], navigatorModel);
  161. let plannerLLM = null;
  162. const plannerModel = agentModels[AgentNameEnum.Planner];
  163. if (plannerModel) {
  164. plannerLLM = createChatModel(providers[plannerModel.provider], plannerModel);
  165. }
  166. let validatorLLM = null;
  167. const validatorModel = agentModels[AgentNameEnum.Validator];
  168. if (validatorModel) {
  169. validatorLLM = createChatModel(providers[validatorModel.provider], validatorModel);
  170. }
  171. const generalSettings = await generalSettingsStore.getSettings();
  172. const executor = new Executor(task, taskId, browserContext, navigatorLLM, {
  173. plannerLLM: plannerLLM ?? navigatorLLM,
  174. validatorLLM: validatorLLM ?? navigatorLLM,
  175. agentOptions: {
  176. maxSteps: generalSettings.maxSteps,
  177. maxFailures: generalSettings.maxFailures,
  178. maxActionsPerStep: generalSettings.maxActionsPerStep,
  179. useVision: generalSettings.useVision,
  180. useVisionForPlanner: generalSettings.useVisionForPlanner,
  181. planningInterval: generalSettings.planningInterval,
  182. },
  183. });
  184. return executor;
  185. }
  186. // Update subscribeToExecutorEvents to use port
  187. async function subscribeToExecutorEvents(executor: Executor) {
  188. // Clear previous event listeners to prevent multiple subscriptions
  189. executor.clearExecutionEvents();
  190. // Subscribe to new events
  191. executor.subscribeExecutionEvents(async event => {
  192. try {
  193. if (currentPort) {
  194. currentPort.postMessage(event);
  195. }
  196. } catch (error) {
  197. logger.error('Failed to send message to side panel:', error);
  198. }
  199. if (
  200. event.state === ExecutionState.TASK_OK ||
  201. event.state === ExecutionState.TASK_FAIL ||
  202. event.state === ExecutionState.TASK_CANCEL
  203. ) {
  204. await currentExecutor?.cleanup();
  205. }
  206. });
  207. }