schemas.ts 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149
  1. import { z } from 'zod';
  2. export interface ActionSchema {
  3. name: string;
  4. description: string;
  5. schema: z.ZodType;
  6. }
  7. export const doneActionSchema: ActionSchema = {
  8. name: 'done',
  9. description: 'Complete task',
  10. schema: z.object({
  11. text: z.string(),
  12. }),
  13. };
  14. // Basic Navigation Actions
  15. export const searchGoogleActionSchema: ActionSchema = {
  16. name: 'search_google',
  17. description: 'Search Google in the current tab',
  18. schema: z.object({
  19. query: z.string(),
  20. }),
  21. };
  22. export const goToUrlActionSchema: ActionSchema = {
  23. name: 'go_to_url',
  24. description: 'Navigate to URL in the current tab',
  25. schema: z.object({
  26. url: z.string(),
  27. }),
  28. };
  29. export const goBackActionSchema: ActionSchema = {
  30. name: 'go_back',
  31. description: 'Go back to the previous page',
  32. schema: z.object({}),
  33. };
  34. export const clickElementActionSchema: ActionSchema = {
  35. name: 'click_element',
  36. description: 'Click element',
  37. schema: z.object({
  38. desc: z.string().optional(), // some small LLM can not generate a description, so let it be optional (but it's still makred as required in json schema)
  39. index: z.number(),
  40. xpath: z.string().optional().nullable(),
  41. }),
  42. };
  43. export const inputTextActionSchema: ActionSchema = {
  44. name: 'input_text',
  45. description: 'Input text into an interactive input element',
  46. schema: z.object({
  47. desc: z.string().optional(),
  48. index: z.number(),
  49. text: z.string(),
  50. xpath: z.string().optional().nullable(),
  51. }),
  52. };
  53. // Tab Management Actions
  54. export const switchTabActionSchema: ActionSchema = {
  55. name: 'switch_tab',
  56. description: 'Switch to tab by id',
  57. schema: z.object({
  58. tab_id: z.number(),
  59. }),
  60. };
  61. export const openTabActionSchema: ActionSchema = {
  62. name: 'open_tab',
  63. description: 'Open URL in new tab',
  64. schema: z.object({
  65. url: z.string(),
  66. }),
  67. };
  68. // Content Actions
  69. export const extractContentActionSchema: ActionSchema = {
  70. name: 'extract_content',
  71. description:
  72. 'Extract page content to retrieve specific information from the page, e.g. all company names, a specifc description, all information about, links with companies in structured format or simply links',
  73. schema: z.object({
  74. goal: z.string(),
  75. }),
  76. };
  77. // Cache Actions
  78. export const cacheContentActionSchema: ActionSchema = {
  79. name: 'cache_content',
  80. description: 'Cache the extracted content of the page',
  81. schema: z.object({
  82. content: z.string(),
  83. }),
  84. };
  85. export const scrollDownActionSchema: ActionSchema = {
  86. name: 'scroll_down',
  87. description: 'Scroll down the page by pixel amount - if no amount is specified, scroll down one page',
  88. schema: z.object({
  89. desc: z.string().optional(),
  90. amount: z.number().optional().nullable(),
  91. }),
  92. };
  93. export const scrollUpActionSchema: ActionSchema = {
  94. name: 'scroll_up',
  95. description: 'Scroll up the page by pixel amount - if no amount is specified, scroll up one page',
  96. schema: z.object({
  97. desc: z.string().optional(),
  98. amount: z.number().optional().nullable(),
  99. }),
  100. };
  101. export const sendKeysActionSchema: ActionSchema = {
  102. name: 'send_keys',
  103. description:
  104. 'Send strings of special keys like Backspace, Insert, PageDown, Delete, Enter. Shortcuts such as `Control+o`, `Control+Shift+T` are supported as well. This gets used in keyboard press. Be aware of different operating systems and their shortcuts',
  105. schema: z.object({
  106. desc: z.string().optional(),
  107. keys: z.string(),
  108. }),
  109. };
  110. export const scrollToTextActionSchema: ActionSchema = {
  111. name: 'scroll_to_text',
  112. description: 'If you dont find something which you want to interact with, scroll to it',
  113. schema: z.object({
  114. desc: z.string().optional(),
  115. text: z.string(),
  116. }),
  117. };
  118. export const getDropdownOptionsActionSchema: ActionSchema = {
  119. name: 'get_dropdown_options',
  120. description: 'Get all options from a native dropdown',
  121. schema: z.object({
  122. index: z.number(),
  123. }),
  124. };
  125. export const selectDropdownOptionActionSchema: ActionSchema = {
  126. name: 'select_dropdown_option',
  127. description: 'Select dropdown option for interactive element index by the text of the option you want to select',
  128. schema: z.object({
  129. index: z.number(),
  130. text: z.string(),
  131. }),
  132. };