Browse Source

Merge pull request #10 from nanobrowser/bugfix

Bugfix
Ashu 5 months ago
parent
commit
71093f2d83

+ 33 - 0
README.md

@@ -66,6 +66,39 @@ Looking for a powerful AI web agent without the $200/month price tag of OpenAI O
     *   Add your LLM API keys.
     *   Add your LLM API keys.
     *   Choose which model to use for different agents (Navigator, Planner, Validator)
     *   Choose which model to use for different agents (Navigator, Planner, Validator)
 
 
+## 🛠️ Build from Source
+
+If you prefer to build Nanobrowser yourself, follow these steps:
+
+1. **Prerequisites**:
+   * [Node.js](https://nodejs.org/) (v22.12.0 or higher)
+   * [pnpm](https://pnpm.io/installation) (v9.15.1 or higher)
+
+2. **Clone the Repository**:
+   ```bash
+   git clone https://github.com/nanobrowser/nanobrowser.git
+   cd nanobrowser
+   ```
+
+3. **Install Dependencies**:
+   ```bash
+   pnpm install
+   ```
+
+4. **Build the Extension**:
+   ```bash
+   pnpm build
+   ```
+
+5. **Load the Extension**:
+   * The built extension will be in the `dist` directory
+   * Follow the installation steps from the Quick Start section to load the extension into your browser
+
+6. **Development Mode** (optional):
+   ```bash
+   pnpm dev
+   ```
+
 ## 💡 See It In Action
 ## 💡 See It In Action
 
 
 Here are some powerful tasks you can accomplish with just a sentence:
 Here are some powerful tasks you can accomplish with just a sentence:

+ 12 - 6
chrome-extension/src/background/agent/actions/builder.ts

@@ -156,7 +156,8 @@ export class ActionBuilder {
 
 
     // Element Interaction Actions
     // Element Interaction Actions
     const clickElement = new Action(async (input: z.infer<typeof clickElementActionSchema.schema>) => {
     const clickElement = new Action(async (input: z.infer<typeof clickElementActionSchema.schema>) => {
-      this.context.emitEvent(Actors.NAVIGATOR, ExecutionState.ACT_START, input.desc);
+      const todo = input.desc || `Click element with index ${input.index}`;
+      this.context.emitEvent(Actors.NAVIGATOR, ExecutionState.ACT_START, todo);
 
 
       const page = await this.context.browserContext.getCurrentPage();
       const page = await this.context.browserContext.getCurrentPage();
       const state = await page.getState();
       const state = await page.getState();
@@ -207,7 +208,8 @@ export class ActionBuilder {
     actions.push(clickElement);
     actions.push(clickElement);
 
 
     const inputText = new Action(async (input: z.infer<typeof inputTextActionSchema.schema>) => {
     const inputText = new Action(async (input: z.infer<typeof inputTextActionSchema.schema>) => {
-      this.context.emitEvent(Actors.NAVIGATOR, ExecutionState.ACT_START, input.desc);
+      const todo = input.desc || `Input text into index ${input.index}`;
+      this.context.emitEvent(Actors.NAVIGATOR, ExecutionState.ACT_START, todo);
 
 
       const page = await this.context.browserContext.getCurrentPage();
       const page = await this.context.browserContext.getCurrentPage();
       const state = await page.getState();
       const state = await page.getState();
@@ -284,7 +286,8 @@ export class ActionBuilder {
     actions.push(cacheContent);
     actions.push(cacheContent);
 
 
     const scrollDown = new Action(async (input: z.infer<typeof scrollDownActionSchema.schema>) => {
     const scrollDown = new Action(async (input: z.infer<typeof scrollDownActionSchema.schema>) => {
-      this.context.emitEvent(Actors.NAVIGATOR, ExecutionState.ACT_START, input.desc);
+      const todo = input.desc || 'Scroll down the page';
+      this.context.emitEvent(Actors.NAVIGATOR, ExecutionState.ACT_START, todo);
 
 
       const page = await this.context.browserContext.getCurrentPage();
       const page = await this.context.browserContext.getCurrentPage();
       await page.scrollDown(input.amount);
       await page.scrollDown(input.amount);
@@ -296,7 +299,8 @@ export class ActionBuilder {
     actions.push(scrollDown);
     actions.push(scrollDown);
 
 
     const scrollUp = new Action(async (input: z.infer<typeof scrollUpActionSchema.schema>) => {
     const scrollUp = new Action(async (input: z.infer<typeof scrollUpActionSchema.schema>) => {
-      this.context.emitEvent(Actors.NAVIGATOR, ExecutionState.ACT_START, input.desc);
+      const todo = input.desc || 'Scroll up the page';
+      this.context.emitEvent(Actors.NAVIGATOR, ExecutionState.ACT_START, todo);
 
 
       const page = await this.context.browserContext.getCurrentPage();
       const page = await this.context.browserContext.getCurrentPage();
       await page.scrollUp(input.amount);
       await page.scrollUp(input.amount);
@@ -309,7 +313,8 @@ export class ActionBuilder {
 
 
     // Keyboard Actions
     // Keyboard Actions
     const sendKeys = new Action(async (input: z.infer<typeof sendKeysActionSchema.schema>) => {
     const sendKeys = new Action(async (input: z.infer<typeof sendKeysActionSchema.schema>) => {
-      this.context.emitEvent(Actors.NAVIGATOR, ExecutionState.ACT_START, input.desc);
+      const todo = input.desc || `Send keys: ${input.keys}`;
+      this.context.emitEvent(Actors.NAVIGATOR, ExecutionState.ACT_START, todo);
 
 
       const page = await this.context.browserContext.getCurrentPage();
       const page = await this.context.browserContext.getCurrentPage();
       await page.sendKeys(input.keys);
       await page.sendKeys(input.keys);
@@ -320,7 +325,8 @@ export class ActionBuilder {
     actions.push(sendKeys);
     actions.push(sendKeys);
 
 
     const scrollToText = new Action(async (input: z.infer<typeof scrollToTextActionSchema.schema>) => {
     const scrollToText = new Action(async (input: z.infer<typeof scrollToTextActionSchema.schema>) => {
-      this.context.emitEvent(Actors.NAVIGATOR, ExecutionState.ACT_START, input.desc);
+      const todo = input.desc || `Scroll to text: ${input.text}`;
+      this.context.emitEvent(Actors.NAVIGATOR, ExecutionState.ACT_START, todo);
 
 
       const page = await this.context.browserContext.getCurrentPage();
       const page = await this.context.browserContext.getCurrentPage();
       try {
       try {

+ 6 - 6
chrome-extension/src/background/agent/actions/schemas.ts

@@ -41,7 +41,7 @@ export const clickElementActionSchema: ActionSchema = {
   name: 'click_element',
   name: 'click_element',
   description: 'Click element',
   description: 'Click element',
   schema: z.object({
   schema: z.object({
-    desc: z.string(),
+    desc: z.string().optional(), // some small LLM can not generate a description, so let it be optional (but it's still makred as required in json schema)
     index: z.number(),
     index: z.number(),
     xpath: z.string().optional(),
     xpath: z.string().optional(),
   }),
   }),
@@ -51,7 +51,7 @@ export const inputTextActionSchema: ActionSchema = {
   name: 'input_text',
   name: 'input_text',
   description: 'Input text into an interactive input element',
   description: 'Input text into an interactive input element',
   schema: z.object({
   schema: z.object({
-    desc: z.string(),
+    desc: z.string().optional(),
     index: z.number(),
     index: z.number(),
     text: z.string(),
     text: z.string(),
     xpath: z.string().optional(),
     xpath: z.string().optional(),
@@ -98,7 +98,7 @@ export const scrollDownActionSchema: ActionSchema = {
   name: 'scroll_down',
   name: 'scroll_down',
   description: 'Scroll down the page by pixel amount - if no amount is specified, scroll down one page',
   description: 'Scroll down the page by pixel amount - if no amount is specified, scroll down one page',
   schema: z.object({
   schema: z.object({
-    desc: z.string(),
+    desc: z.string().optional(),
     amount: z.number().optional(),
     amount: z.number().optional(),
   }),
   }),
 };
 };
@@ -107,7 +107,7 @@ export const scrollUpActionSchema: ActionSchema = {
   name: 'scroll_up',
   name: 'scroll_up',
   description: 'Scroll up the page by pixel amount - if no amount is specified, scroll up one page',
   description: 'Scroll up the page by pixel amount - if no amount is specified, scroll up one page',
   schema: z.object({
   schema: z.object({
-    desc: z.string(),
+    desc: z.string().optional(),
     amount: z.number().optional(),
     amount: z.number().optional(),
   }),
   }),
 };
 };
@@ -117,7 +117,7 @@ export const sendKeysActionSchema: ActionSchema = {
   description:
   description:
     'Send strings of special keys like Backspace, Insert, PageDown, Delete, Enter. Shortcuts such as `Control+o`, `Control+Shift+T` are supported as well. This gets used in keyboard press. Be aware of different operating systems and their shortcuts',
     'Send strings of special keys like Backspace, Insert, PageDown, Delete, Enter. Shortcuts such as `Control+o`, `Control+Shift+T` are supported as well. This gets used in keyboard press. Be aware of different operating systems and their shortcuts',
   schema: z.object({
   schema: z.object({
-    desc: z.string(),
+    desc: z.string().optional(),
     keys: z.string(),
     keys: z.string(),
   }),
   }),
 };
 };
@@ -126,7 +126,7 @@ export const scrollToTextActionSchema: ActionSchema = {
   name: 'scroll_to_text',
   name: 'scroll_to_text',
   description: 'If you dont find something which you want to interact with, scroll to it',
   description: 'If you dont find something which you want to interact with, scroll to it',
   schema: z.object({
   schema: z.object({
-    desc: z.string(),
+    desc: z.string().optional(),
     text: z.string(),
     text: z.string(),
   }),
   }),
 };
 };

+ 4 - 4
chrome-extension/src/background/agent/prompts/navigator.ts

@@ -34,12 +34,12 @@ export class NavigatorPrompt extends BasePrompt {
 
 
    Common action sequences:
    Common action sequences:
    - Form filling: [
    - Form filling: [
-       {"input_text": {"index": 1, "text": "username"}},
-       {"input_text": {"index": 2, "text": "password"}},
-       {"click_element": {"index": 3}}
+       {"input_text": {"desc": "Fill title", "index": 1, "text": "example title"}},
+       {"input_text": {"desc": "Fill comment", "index": 2, "text": "example comment"}},
+       {"click_element": {"desc": "Click submit button", "index": 3}}
      ]
      ]
    - Navigation: [
    - Navigation: [
-       {"open_tab": {}},
+       {"open_tab": {"url": "https://example.com"}},
        {"go_to_url": {"url": "https://example.com"}},
        {"go_to_url": {"url": "https://example.com"}},
      ]
      ]
 
 

chrome-extension/public/content.css → pages/content/public/_content.css


+ 0 - 0
pages/options/public/_options.css


+ 2 - 2
pages/options/src/components/ModelSettings.tsx

@@ -75,8 +75,8 @@ export const ModelSettings = () => {
     setApiKeys(prev => ({
     setApiKeys(prev => ({
       ...prev,
       ...prev,
       [provider]: {
       [provider]: {
-        apiKey,
-        baseUrl: baseUrl !== undefined ? baseUrl : prev[provider]?.baseUrl,
+        apiKey: apiKey.trim(),
+        baseUrl: baseUrl !== undefined ? baseUrl.trim() : prev[provider]?.baseUrl,
       },
       },
     }));
     }));
   };
   };