base.ts 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111
  1. import { HumanMessage, type SystemMessage } from '@langchain/core/messages';
  2. import type { AgentContext } from '@src/background/agent/types';
  3. import { createLogger } from '@src/background/log';
  4. const logger = createLogger('agent/prompts/base');
  5. /**
  6. * Abstract base class for all prompt types
  7. */
  8. abstract class BasePrompt {
  9. /**
  10. * Returns the system message that defines the AI's role and behavior
  11. * @returns SystemMessage from LangChain
  12. */
  13. abstract getSystemMessage(): SystemMessage;
  14. /**
  15. * Returns the user message for the specific prompt type
  16. * @param context - Optional context data needed for generating the user message
  17. * @returns HumanMessage from LangChain
  18. */
  19. abstract getUserMessage(context: AgentContext): Promise<HumanMessage>;
  20. /**
  21. * Builds the user message containing the browser state
  22. * @param context - The agent context
  23. * @returns HumanMessage from LangChain
  24. */
  25. async buildBrowserStateUserMessage(context: AgentContext): Promise<HumanMessage> {
  26. const browserState = await context.browserContext.getState();
  27. const elementsText = browserState.elementTree.clickableElementsToString(context.options.includeAttributes);
  28. const hasContentAbove = (browserState.pixelsAbove || 0) > 0;
  29. const hasContentBelow = (browserState.pixelsBelow || 0) > 0;
  30. let formattedElementsText = '';
  31. if (elementsText !== '') {
  32. if (hasContentAbove) {
  33. // formattedElementsText = `... ${browserState.pixelsAbove} pixels above - scroll up or extract content to see more ...\n${elementsText}`;
  34. formattedElementsText = `... ${browserState.pixelsAbove} pixels above - scroll up to see more ...\n${elementsText}`;
  35. } else {
  36. formattedElementsText = `[Start of page]\n${elementsText}`;
  37. }
  38. if (hasContentBelow) {
  39. // formattedElementsText = `${formattedElementsText}\n... ${browserState.pixelsBelow} pixels below - scroll down or extract content to see more ...`;
  40. formattedElementsText = `${formattedElementsText}\n... ${browserState.pixelsBelow} pixels below - scroll down to see more ...`;
  41. } else {
  42. formattedElementsText = `${formattedElementsText}\n[End of page]`;
  43. }
  44. } else {
  45. formattedElementsText = 'empty page';
  46. }
  47. let stepInfoDescription = '';
  48. if (context.stepInfo) {
  49. stepInfoDescription = `Current step: ${context.stepInfo.stepNumber + 1}/${context.stepInfo.maxSteps}`;
  50. }
  51. const timeStr = new Date().toISOString().slice(0, 16).replace('T', ' '); // Format: YYYY-MM-DD HH:mm
  52. stepInfoDescription += `Current date and time: ${timeStr}`;
  53. let actionResultsDescription = '';
  54. if (context.actionResults.length > 0) {
  55. for (let i = 0; i < context.actionResults.length; i++) {
  56. const result = context.actionResults[i];
  57. if (result.extractedContent) {
  58. actionResultsDescription += `\nAction result ${i + 1}/${context.actionResults.length}: ${result.extractedContent}`;
  59. }
  60. if (result.error) {
  61. // only use last line of error
  62. const error = result.error.split('\n').pop();
  63. actionResultsDescription += `\nAction error ${i + 1}/${context.actionResults.length}: ...${error}`;
  64. }
  65. }
  66. }
  67. const currentTab = `{id: ${browserState.tabId}, url: ${browserState.url}, title: ${browserState.title}}`;
  68. const otherTabs = browserState.tabs
  69. .filter(tab => tab.id !== browserState.tabId)
  70. .map(tab => `- {id: ${tab.id}, url: ${tab.url}, title: ${tab.title}}`);
  71. const stateDescription = `
  72. [任务历史记忆结束]
  73. [当前状态开始]
  74. 以下是一次性信息 - 如果您需要记住它,请将其写入内存:
  75. 当前标签页: ${currentTab}
  76. 其他可用标签页:
  77. ${otherTabs.join('\n')}
  78. 当前页面视口内顶层的交互元素:
  79. ${formattedElementsText}
  80. ${stepInfoDescription}
  81. ${actionResultsDescription}
  82. `;
  83. if (browserState.screenshot && context.options.useVision) {
  84. return new HumanMessage({
  85. content: [
  86. { type: 'text', text: stateDescription },
  87. {
  88. type: 'image_url',
  89. image_url: { url: `data:image/jpeg;base64,${browserState.screenshot}` },
  90. },
  91. ],
  92. });
  93. }
  94. return new HumanMessage(stateDescription);
  95. }
  96. }
  97. export { BasePrompt };