Kaynağa Gözat

add a button to stop the running task

alexchenzl 7 ay önce
ebeveyn
işleme
a1bce362b3

+ 1 - 0
.gitignore

@@ -165,6 +165,7 @@ cython_debug/
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 .idea/
+.vscode/
 
 # PyPI configuration file
 .pypirc

+ 29 - 13
extension/src/background.js

@@ -86,19 +86,35 @@ function generateTaskId() {
 
 // Message handling from sidebar
 chrome.runtime.onMessage.addListener((message, sender, sendResponse) => {
-  if (message.type === 'SEND_MESSAGE' && webSocket) {
-    const taskMessage = {
-      kind: "create",
-      data: {
-        task_id: generateTaskId(),
-        intent: message.text,
-        args: { tab_id: message.tabId }
-      }
-    };
-    webSocket.send(JSON.stringify(taskMessage));
-    sendResponse({ success: true });
-  }
-  return true;
+    if (message.type === 'SEND_MESSAGE' && webSocket) {
+        const taskId = generateTaskId();
+        const taskMessage = {
+            kind: "create",
+            data: {
+                task_id: taskId,
+                intent: message.text,
+                args: { tab_id: message.tabId }
+            }
+        };
+        webSocket.send(JSON.stringify(taskMessage));
+        sendResponse({ success: true, taskId: taskId }); // Send back the taskId
+    } else if (message.type === 'CANCEL_TASK' && webSocket) {
+        if (message.taskId) {
+            console.log('Cancelling task:', message.taskId);
+            const cancelMessage = {
+                kind: "cancel",
+                data: {
+                    task_id: message.taskId
+                }
+            };
+            webSocket.send(JSON.stringify(cancelMessage));
+            sendResponse({ success: true });
+        } else {
+            console.warn('Attempted to cancel task without taskId');
+            sendResponse({ success: false });
+        }
+    }
+    return true;
 });
 
 // Initialize WebSocket connection

+ 14 - 0
extension/src/sidebar.css

@@ -61,6 +61,20 @@ html, body {
     background: #0056b3;
 }
 
+#stop-button {
+    padding: 8px;
+    background: #dc3545;
+    color: white;
+    border: none;
+    border-radius: 4px;
+    cursor: pointer;
+    height: 4.5em;
+}
+
+#stop-button:hover {
+    background: #c82333;
+}
+
 .connection-status {
     padding: 6px;
     text-align: center;

+ 1 - 0
extension/src/sidebar.html

@@ -16,6 +16,7 @@
             <div class="input-container">
                 <textarea id="chat-input" placeholder="Type your message..."></textarea>
                 <button id="send-button">Send</button>
+                <button id="stop-button" style="display: none;">Stop</button>
             </div>
         </div>
     </div>

+ 40 - 4
extension/src/sidebar.js

@@ -1,19 +1,23 @@
 function setInputsEnabled(enabled) {
     const chatInput = document.getElementById('chat-input');
     const sendButton = document.getElementById('send-button');
+    const stopButton = document.getElementById('stop-button');
 
     chatInput.disabled = !enabled;
-    sendButton.disabled = !enabled;
     
     // Add visual styling for disabled state
     if (enabled) {
         chatInput.style.backgroundColor = '';
         chatInput.style.color = '';
-        sendButton.style.opacity = '';
+        // Show send button, hide stop button
+        sendButton.style.display = 'block';
+        stopButton.style.display = 'none';
     } else {
         chatInput.style.backgroundColor = '#f5f5f5';
         chatInput.style.color = '#999';
-        sendButton.style.opacity = '0.5';
+        // Show stop button, hide send button
+        sendButton.style.display = 'none';
+        stopButton.style.display = 'block';
     }
 }
 
@@ -45,8 +49,10 @@ document.addEventListener('DOMContentLoaded', async () => {
     }
     window.messageHistory = messageHistory;
 
+    let currentTaskId = null; // Track current task ID
+
     // Handle sending messages
-    function handleSendMessage() {
+    async function handleSendMessage() {
         const text = chatInput.value.trim();
         if (!text) return;
 
@@ -70,12 +76,17 @@ document.addEventListener('DOMContentLoaded', async () => {
                 type: 'SEND_MESSAGE',
                 text,
                 tabId
+            }).then(response => {
+                if (response?.taskId) {
+                    currentTaskId = response.taskId; // Store the task ID
+                }
             }).catch(err => {
                 addMessage({
                     actor: 'system',
                     content: 'Failed to send message',
                     timestamp: new Date()
                 });
+                setInputsEnabled(true);
             });
         });
     }
@@ -113,6 +124,31 @@ document.addEventListener('DOMContentLoaded', async () => {
     
     // Listen for input events
     chatInput.addEventListener('input', autoGrow);
+
+    // Add stop button handler
+    document.getElementById('stop-button').addEventListener('click', () => {
+        if (currentTaskId) {
+            // Send CANCEL message to service worker
+            chrome.runtime.sendMessage({
+                type: 'CANCEL_TASK',
+                taskId: currentTaskId
+            }).catch(err => {
+                addMessage({
+                    actor: 'system',
+                    content: 'Failed to cancel task',
+                    timestamp: new Date()
+                });
+            });
+            
+            currentTaskId = null;
+            setInputsEnabled(true);
+        }
+    });
+
+    // When task completes or errors, switch back to send button
+    function handleTaskComplete() {
+        currentTaskId = null;
+    }
 });
 
 // Helper function for generating fallback ID

+ 11 - 1
src/nanobrowser/lib/agent/agents/navigator.py

@@ -83,7 +83,7 @@ class NavigatorAgent(BaseAgent):
             tool_calls = []
             final_message = ''
 
-            while allowed_tool_rounds > 0: 
+            while allowed_tool_rounds > 0 and not self.context.stop_task_now: 
                 llm_with_tools = self.chatLLM.bind_tools(tools_to_use)
                 ai_response = await llm_with_tools.ainvoke(self.message_history.get_messages())    
                 self.message_history.add_message(ai_response)
@@ -94,6 +94,9 @@ class NavigatorAgent(BaseAgent):
 
                     # execute tool calls and return tool messages back to the LLM
                     for tool_call in ai_response.tool_calls:
+                        if self.context.stop_task_now:
+                            break
+
                         tool_name = tool_call["name"].lower()
                         tool_args = tool_call["args"]
                         selected_tool = self.tools[tool_name]
@@ -134,6 +137,13 @@ class NavigatorAgent(BaseAgent):
 
                 allowed_tool_rounds -= 1
 
+            if self.context.stop_task_now:
+                return AgentOutput(
+                    intent=user_input,
+                    result=None,
+                    error="Task cancelled"
+                )
+
             if allowed_tool_rounds == 0:
                 # emit event
                 self.context.error += 1

+ 8 - 1
src/nanobrowser/lib/agent/agents/planner.py

@@ -72,7 +72,7 @@ class PlannerAgent(BaseAgent):
             self.message_history.add_message(user_message)
 
             retry = 0
-            while retry < 3:
+            while retry < 3 and not self.context.stop_task_now:
                 # sometimes LLM doesn't return the structured output, so we need to retry
                 structured_llm = self.chatLLM.with_structured_output(PlannerResult, include_raw=True)
                 response: dict[str, Any] = structured_llm.invoke(self.message_history.get_messages())
@@ -83,6 +83,13 @@ class PlannerAgent(BaseAgent):
                     break
                 retry += 1
 
+            if self.context.stop_task_now:
+                return AgentOutput(
+                    intent=user_input,
+                    result=None,
+                    error="Task cancelled"
+                )
+
             result_str = result.model_dump_json(exclude_none=True)
             self.message_history.add_message(AIMessage(content=result_str))
 

+ 2 - 0
src/nanobrowser/lib/agent/context.py

@@ -42,6 +42,8 @@ class AgentContext():
     max_tool_rounds: int = DEFAULT_MAX_TOOL_ROUNDS
     # max errors allowed
     max_errors: int = DEFAULT_MAX_ERRORS
+    # stop task now as requested by user
+    stop_task_now: bool = False
 
 
 class Actors:

+ 11 - 0
src/nanobrowser/lib/agent/executor.py

@@ -106,6 +106,10 @@ class Executor:
             await self._browser_manager.close()
             self._browser_context = None
             self._browser_manager = None
+
+    def stop_task_now(self):
+        """Set a flag to stop the current task execution now"""
+        self._agent_context.stop_task_now = True
             
     async def run(self, task: str, task_id: str, max_steps: Optional[int] = 100, tab_id: Optional[str] = None):
         """
@@ -173,6 +177,12 @@ class Executor:
                     task_id=self._agent_context.task_id,
                     step=self._agent_context.step,
                 )
+
+                if self._agent_context.stop_task_now:
+                    event_data.details = "Task cancelled"
+                    await self._emit_event(ExecutionState.TASK_CANCEL, event_data)
+                    break
+
                 # check if the task has reached the maximum number of steps
                 if self._agent_context.step >= self._agent_context.max_steps:
                     event_data.details = f"Task failed with max steps reached: {self._agent_context.step}"
@@ -215,6 +225,7 @@ class Executor:
             # save chat history
             self._planner.save_chat_history()
             self._navigator.save_chat_history()
+            self._agent_context.stop_task_now = False
 
     async def _emit_event(self, state: ExecutionState, data: EventData):
         if self._agent_context:

+ 1 - 1
src/nanobrowser/lib/websocket/server.py

@@ -125,7 +125,7 @@ class WebSocketServer:
 
     async def _handle_cancel_task(self, message_data: dict, websocket: WebSocketServerProtocol):
         """Handle cancel_task message"""
-        raise NotImplementedError("Cancel task is not implemented")
+        self._executor.stop_task_now()
 
     async def _send_task_state(self, websocket: WebSocketServerProtocol, event: Event):
         """Send task state update to client"""