json_schema.ts 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534
  1. // This is the json schema exported from browser-use v0.1.41 with minor changes,
  2. // - change page_id to tab_id
  3. // - add intent to some actions which is used to describe the action's purpose
  4. // - remove extract_content action, because it usually submit very long content to LLM
  5. // - remove DragDropAction, it's not supported yet
  6. // - remove save_pdf action, it's not supported yet
  7. // - remove Position, not needed
  8. // - remove NoParamsAction, not needed
  9. // TODO: don't know why zod can not generate the same schema, need to fix it
  10. export const jsonNavigatorOutputSchema = {
  11. $defs: {
  12. ActionModel: {
  13. properties: {
  14. done: {
  15. anyOf: [
  16. {
  17. $ref: '#/$defs/DoneAction',
  18. },
  19. {
  20. type: 'null',
  21. },
  22. ],
  23. description: 'Complete task',
  24. },
  25. search_google: {
  26. anyOf: [
  27. {
  28. $ref: '#/$defs/SearchGoogleAction',
  29. },
  30. {
  31. type: 'null',
  32. },
  33. ],
  34. description:
  35. 'Search the query in Google in the current tab, the query should be a search query like humans search in Google, concrete and not vague or super long. More the single most important items. ',
  36. },
  37. go_to_url: {
  38. anyOf: [
  39. {
  40. $ref: '#/$defs/GoToUrlAction',
  41. },
  42. {
  43. type: 'null',
  44. },
  45. ],
  46. description: 'Navigate to URL in the current tab',
  47. },
  48. go_back: {
  49. anyOf: [
  50. {
  51. $ref: '#/$defs/GoBackAction',
  52. },
  53. {
  54. type: 'null',
  55. },
  56. ],
  57. description: 'Go back to previous page',
  58. },
  59. // wait: {
  60. // anyOf: [
  61. // {
  62. // $ref: '#/$defs/WaitAction',
  63. // },
  64. // {
  65. // type: 'null',
  66. // },
  67. // ],
  68. // description: 'Wait for x seconds default 3',
  69. // },
  70. click_element: {
  71. anyOf: [
  72. {
  73. $ref: '#/$defs/ClickElementAction',
  74. },
  75. {
  76. type: 'null',
  77. },
  78. ],
  79. description: 'Click element by index',
  80. },
  81. input_text: {
  82. anyOf: [
  83. {
  84. $ref: '#/$defs/InputTextAction',
  85. },
  86. {
  87. type: 'null',
  88. },
  89. ],
  90. description: 'Input text into an interactive input element',
  91. },
  92. switch_tab: {
  93. anyOf: [
  94. {
  95. $ref: '#/$defs/SwitchTabAction',
  96. },
  97. {
  98. type: 'null',
  99. },
  100. ],
  101. description: 'Switch tab',
  102. },
  103. open_tab: {
  104. anyOf: [
  105. {
  106. $ref: '#/$defs/OpenTabAction',
  107. },
  108. {
  109. type: 'null',
  110. },
  111. ],
  112. description: 'Open url in new tab',
  113. },
  114. close_tab: {
  115. anyOf: [
  116. {
  117. $ref: '#/$defs/CloseTabAction',
  118. },
  119. {
  120. type: 'null',
  121. },
  122. ],
  123. description: 'Close tab by tab_id',
  124. },
  125. cache_content: {
  126. anyOf: [
  127. {
  128. $ref: '#/$defs/cache_content_parameters',
  129. },
  130. {
  131. type: 'null',
  132. },
  133. ],
  134. description: 'Cache what you have found so far from the current page for future use',
  135. },
  136. scroll_down: {
  137. anyOf: [
  138. {
  139. $ref: '#/$defs/ScrollAction',
  140. },
  141. {
  142. type: 'null',
  143. },
  144. ],
  145. description: 'Scroll down the page by pixel amount - if no amount is specified, scroll down one page',
  146. },
  147. scroll_up: {
  148. anyOf: [
  149. {
  150. $ref: '#/$defs/ScrollAction',
  151. },
  152. {
  153. type: 'null',
  154. },
  155. ],
  156. description: 'Scroll up the page by pixel amount - if no amount is specified, scroll up one page',
  157. },
  158. send_keys: {
  159. anyOf: [
  160. {
  161. $ref: '#/$defs/SendKeysAction',
  162. },
  163. {
  164. type: 'null',
  165. },
  166. ],
  167. description:
  168. 'Send strings of special keys like Escape, Backspace, Insert, PageDown, Delete, Enter, Shortcuts such as `Control+o`, `Control+Shift+T` are supported as well. This gets used in keyboard.press.',
  169. },
  170. scroll_to_text: {
  171. anyOf: [
  172. {
  173. $ref: '#/$defs/scroll_to_text_parameters',
  174. },
  175. {
  176. type: 'null',
  177. },
  178. ],
  179. description: 'If you dont find something which you want to interact with, scroll to it',
  180. },
  181. get_dropdown_options: {
  182. anyOf: [
  183. {
  184. $ref: '#/$defs/get_dropdown_options_parameters',
  185. },
  186. {
  187. type: 'null',
  188. },
  189. ],
  190. description: 'Get all options from a native dropdown',
  191. },
  192. select_dropdown_option: {
  193. anyOf: [
  194. {
  195. $ref: '#/$defs/select_dropdown_option_parameters',
  196. },
  197. {
  198. type: 'null',
  199. },
  200. ],
  201. description:
  202. 'Select dropdown option for interactive element index by the text of the option you want to select',
  203. },
  204. },
  205. title: 'ActionModel',
  206. type: 'object',
  207. },
  208. AgentBrain: {
  209. description: 'Current state of the agent',
  210. properties: {
  211. evaluation_previous_goal: {
  212. title: 'Evaluation of previous goal',
  213. type: 'string',
  214. },
  215. memory: {
  216. title: 'Memory',
  217. type: 'string',
  218. },
  219. next_goal: {
  220. title: 'Next Goal',
  221. type: 'string',
  222. },
  223. },
  224. required: ['evaluation_previous_goal', 'memory', 'next_goal'],
  225. title: 'AgentBrain',
  226. type: 'object',
  227. },
  228. ClickElementAction: {
  229. properties: {
  230. intent: {
  231. title: 'Intent',
  232. type: 'string',
  233. description: 'purpose of this action',
  234. },
  235. index: {
  236. title: 'Index',
  237. type: 'integer',
  238. },
  239. xpath: {
  240. anyOf: [
  241. {
  242. type: 'string',
  243. },
  244. {
  245. type: 'null',
  246. },
  247. ],
  248. title: 'Xpath',
  249. },
  250. },
  251. required: ['intent', 'index'],
  252. title: 'ClickElementAction',
  253. type: 'object',
  254. },
  255. CloseTabAction: {
  256. properties: {
  257. intent: {
  258. title: 'Intent',
  259. type: 'string',
  260. description: 'purpose of this action',
  261. },
  262. tab_id: {
  263. title: 'Tab Id',
  264. type: 'integer',
  265. },
  266. },
  267. required: ['intent', 'tab_id'],
  268. title: 'CloseTabAction',
  269. type: 'object',
  270. },
  271. DoneAction: {
  272. properties: {
  273. text: {
  274. title: 'Text',
  275. type: 'string',
  276. },
  277. success: {
  278. title: 'Success',
  279. type: 'boolean',
  280. },
  281. },
  282. required: ['text', 'success'],
  283. title: 'DoneAction',
  284. type: 'object',
  285. },
  286. GoToUrlAction: {
  287. properties: {
  288. intent: {
  289. title: 'Intent',
  290. type: 'string',
  291. description: 'purpose of this action',
  292. },
  293. url: {
  294. title: 'Url',
  295. type: 'string',
  296. },
  297. },
  298. required: ['intent', 'url'],
  299. title: 'GoToUrlAction',
  300. type: 'object',
  301. },
  302. GoBackAction: {
  303. properties: {
  304. intent: {
  305. title: 'Intent',
  306. type: 'string',
  307. description: 'purpose of this action',
  308. },
  309. },
  310. required: ['intent'],
  311. title: 'GoBackAction',
  312. type: 'object',
  313. },
  314. InputTextAction: {
  315. properties: {
  316. intent: {
  317. title: 'Intent',
  318. type: 'string',
  319. description: 'purpose of this action',
  320. },
  321. index: {
  322. title: 'Index',
  323. type: 'integer',
  324. },
  325. text: {
  326. title: 'Text',
  327. type: 'string',
  328. },
  329. xpath: {
  330. anyOf: [
  331. {
  332. type: 'string',
  333. },
  334. {
  335. type: 'null',
  336. },
  337. ],
  338. title: 'Xpath',
  339. },
  340. },
  341. required: ['intent', 'index', 'text'],
  342. title: 'InputTextAction',
  343. type: 'object',
  344. },
  345. OpenTabAction: {
  346. properties: {
  347. intent: {
  348. title: 'Intent',
  349. type: 'string',
  350. description: 'purpose of this action',
  351. },
  352. url: {
  353. title: 'Url',
  354. type: 'string',
  355. },
  356. },
  357. required: ['intent', 'url'],
  358. title: 'OpenTabAction',
  359. type: 'object',
  360. },
  361. ScrollAction: {
  362. properties: {
  363. intent: {
  364. title: 'Intent',
  365. type: 'string',
  366. description: 'purpose of this action',
  367. },
  368. amount: {
  369. anyOf: [
  370. {
  371. type: 'integer',
  372. },
  373. {
  374. type: 'null',
  375. },
  376. ],
  377. title: 'Amount',
  378. },
  379. },
  380. required: ['intent', 'amount'],
  381. title: 'ScrollAction',
  382. type: 'object',
  383. },
  384. SearchGoogleAction: {
  385. properties: {
  386. intent: {
  387. title: 'Intent',
  388. type: 'string',
  389. description: 'purpose of this action',
  390. },
  391. query: {
  392. title: 'Query',
  393. type: 'string',
  394. },
  395. },
  396. required: ['intent', 'query'],
  397. title: 'SearchGoogleAction',
  398. type: 'object',
  399. },
  400. SendKeysAction: {
  401. properties: {
  402. intent: {
  403. title: 'Intent',
  404. type: 'string',
  405. description: 'purpose of this action',
  406. },
  407. keys: {
  408. title: 'Keys',
  409. type: 'string',
  410. },
  411. },
  412. required: ['intent', 'keys'],
  413. title: 'SendKeysAction',
  414. type: 'object',
  415. },
  416. SwitchTabAction: {
  417. properties: {
  418. intent: {
  419. title: 'Intent',
  420. type: 'string',
  421. description: 'purpose of this action',
  422. },
  423. tab_id: {
  424. title: 'Tab Id',
  425. type: 'integer',
  426. },
  427. },
  428. required: ['intent', 'tab_id'],
  429. title: 'SwitchTabAction',
  430. type: 'object',
  431. },
  432. cache_content_parameters: {
  433. properties: {
  434. intent: {
  435. title: 'Intent',
  436. type: 'string',
  437. description: 'purpose of this action',
  438. },
  439. content: {
  440. title: 'Content',
  441. type: 'string',
  442. },
  443. },
  444. required: ['intent', 'content'],
  445. title: 'cache_content_parameters',
  446. type: 'object',
  447. },
  448. get_dropdown_options_parameters: {
  449. properties: {
  450. intent: {
  451. title: 'Intent',
  452. type: 'string',
  453. description: 'purpose of this action',
  454. },
  455. index: {
  456. title: 'Index',
  457. type: 'integer',
  458. },
  459. },
  460. required: ['intent', 'index'],
  461. title: 'get_dropdown_options_parameters',
  462. type: 'object',
  463. },
  464. scroll_to_text_parameters: {
  465. properties: {
  466. intent: {
  467. title: 'Intent',
  468. type: 'string',
  469. description: 'purpose of this action',
  470. },
  471. text: {
  472. title: 'Text',
  473. type: 'string',
  474. },
  475. },
  476. required: ['intent', 'text'],
  477. title: 'scroll_to_text_parameters',
  478. type: 'object',
  479. },
  480. select_dropdown_option_parameters: {
  481. properties: {
  482. intent: {
  483. title: 'Intent',
  484. type: 'string',
  485. description: 'purpose of this action',
  486. },
  487. index: {
  488. title: 'Index',
  489. type: 'integer',
  490. },
  491. text: {
  492. title: 'Text',
  493. type: 'string',
  494. },
  495. },
  496. required: ['intent', 'index', 'text'],
  497. title: 'select_dropdown_option_parameters',
  498. type: 'object',
  499. },
  500. WaitAction: {
  501. properties: {
  502. intent: {
  503. title: 'Intent',
  504. type: 'string',
  505. description: 'purpose of this action',
  506. },
  507. seconds: {
  508. title: 'Seconds',
  509. type: 'integer',
  510. default: 3,
  511. },
  512. },
  513. required: ['intent', 'seconds'],
  514. title: 'WaitAction',
  515. type: 'object',
  516. },
  517. },
  518. properties: {
  519. current_state: {
  520. $ref: '#/$defs/AgentBrain',
  521. },
  522. action: {
  523. items: {
  524. $ref: '#/$defs/ActionModel',
  525. },
  526. title: 'Action',
  527. type: 'array',
  528. },
  529. },
  530. required: ['current_state', 'action'],
  531. title: 'AgentOutput',
  532. type: 'object',
  533. };