|
@@ -0,0 +1,530 @@
|
|
|
+import json
|
|
|
+import os
|
|
|
+import re
|
|
|
+import logging
|
|
|
+import traceback
|
|
|
+from typing import Annotated, Optional, Any
|
|
|
+from pathlib import Path
|
|
|
+from playwright.async_api import Page
|
|
|
+from ..manager import PlaywrightManager, PlaywrightOptions
|
|
|
+
|
|
|
+logger = logging.getLogger(__name__)
|
|
|
+
|
|
|
+space_delimited_mmid = re.compile(r'^[\d ]+$')
|
|
|
+
|
|
|
+def is_space_delimited_mmid(s: str) -> bool:
|
|
|
+ """
|
|
|
+ Check if the given string matches the the mmid pattern of number space repeated.
|
|
|
+
|
|
|
+ Parameters:
|
|
|
+ - s (str): The string to check against the pattern.
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ - bool: True if the string matches the pattern, False otherwise.
|
|
|
+ """
|
|
|
+ # Use fullmatch() to ensure the entire string matches the pattern
|
|
|
+ return bool(space_delimited_mmid.fullmatch(s))
|
|
|
+
|
|
|
+
|
|
|
+async def __inject_attributes(page: Page):
|
|
|
+ """
|
|
|
+ Injects 'mmid' and 'aria-keyshortcuts' into all DOM elements. If an element already has an 'aria-keyshortcuts',
|
|
|
+ it renames it to 'orig-aria-keyshortcuts' before injecting the new 'aria-keyshortcuts'
|
|
|
+ This will be captured in the accessibility tree and thus make it easier to reconcile the tree with the DOM.
|
|
|
+ 'aria-keyshortcuts' is choosen because it is not widely used aria attribute.
|
|
|
+ """
|
|
|
+
|
|
|
+ last_mmid = await page.evaluate("""() => {
|
|
|
+ const allElements = document.querySelectorAll('*');
|
|
|
+ let id = 0;
|
|
|
+ allElements.forEach(element => {
|
|
|
+ const origAriaAttribute = element.getAttribute('aria-keyshortcuts');
|
|
|
+ const mmid = `${++id}`;
|
|
|
+ element.setAttribute('mmid', mmid);
|
|
|
+ element.setAttribute('aria-keyshortcuts', mmid);
|
|
|
+ //console.log(`Injected 'mmid'into element with tag: ${element.tagName} and mmid: ${mmid}`);
|
|
|
+ if (origAriaAttribute) {
|
|
|
+ element.setAttribute('orig-aria-keyshortcuts', origAriaAttribute);
|
|
|
+ }
|
|
|
+ });
|
|
|
+ return id;
|
|
|
+ }""")
|
|
|
+ logger.debug(f"Added MMID into {last_mmid} elements")
|
|
|
+
|
|
|
+
|
|
|
+async def __fetch_dom_info(page: Page, accessibility_tree: dict[str, Any], only_input_fields: bool):
|
|
|
+ """
|
|
|
+ Iterates over the accessibility tree, fetching additional information from the DOM based on 'mmid',
|
|
|
+ and constructs a new JSON structure with detailed information.
|
|
|
+
|
|
|
+ Args:
|
|
|
+ page (Page): The page object representing the web page.
|
|
|
+ accessibility_tree (dict[str, Any]): The accessibility tree JSON structure.
|
|
|
+ only_input_fields (bool): Flag indicating whether to include only input fields in the new JSON structure.
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ dict[str, Any]: The pruned tree with detailed information from the DOM.
|
|
|
+ """
|
|
|
+
|
|
|
+ logger.debug("Reconciling the Accessibility Tree with the DOM")
|
|
|
+ # Define the attributes to fetch for each element
|
|
|
+ attributes = ['name', 'aria-label', 'placeholder', 'mmid', "id", "for", "data-testid"]
|
|
|
+ backup_attributes = [] #if the attributes are not found, then try to get these attributes
|
|
|
+ tags_to_ignore = ['head','style', 'script', 'link', 'meta', 'noscript', 'template', 'iframe', 'g', 'main', 'c-wiz','svg', 'path']
|
|
|
+ attributes_to_delete = ["level", "multiline", "haspopup", "id", "for"]
|
|
|
+ ids_to_ignore = ['agentDriveAutoOverlay']
|
|
|
+
|
|
|
+ # Recursive function to process each node in the accessibility tree
|
|
|
+ async def process_node(node: dict[str, Any]):
|
|
|
+ if 'children' in node:
|
|
|
+ for child in node['children']:
|
|
|
+ await process_node(child)
|
|
|
+
|
|
|
+ # Use 'name' attribute from the accessibility node as 'mmid'
|
|
|
+ mmid_temp: str = node.get('keyshortcuts') # type: ignore
|
|
|
+
|
|
|
+ # If the name has multiple mmids, take the last one
|
|
|
+ if(mmid_temp and is_space_delimited_mmid(mmid_temp)):
|
|
|
+ #TODO: consider if we should grab each of the mmids and process them separately as seperate nodes copying this node's attributes
|
|
|
+ mmid_temp = mmid_temp.split(' ')[-1]
|
|
|
+
|
|
|
+ #focusing on nodes with mmid, which is the attribute we inject
|
|
|
+ try:
|
|
|
+ mmid = int(mmid_temp)
|
|
|
+ except (ValueError, TypeError):
|
|
|
+ #logger.error(f"'name attribute contains \"{node.get('name')}\", which is not a valid numeric mmid. Adding node as is: {node}")
|
|
|
+ return node.get('name')
|
|
|
+
|
|
|
+ if node['role'] == 'menuitem':
|
|
|
+ return node.get('name')
|
|
|
+
|
|
|
+ if node.get('role') == 'dialog' and node.get('modal') == True: # noqa: E712
|
|
|
+ node["important information"] = "This is a modal dialog. Please interact with this dialog and close it to be able to interact with the full page (e.g. by pressing the close button or selecting an option)."
|
|
|
+
|
|
|
+ if mmid:
|
|
|
+ # Determine if we need to fetch 'innerText' based on the absence of 'children' in the accessibility node
|
|
|
+ should_fetch_inner_text = 'children' not in node
|
|
|
+
|
|
|
+ js_code = """
|
|
|
+ (input_params) => {
|
|
|
+ const should_fetch_inner_text = input_params.should_fetch_inner_text;
|
|
|
+ const mmid = input_params.mmid;
|
|
|
+ const attributes = input_params.attributes;
|
|
|
+ const tags_to_ignore = input_params.tags_to_ignore;
|
|
|
+ const ids_to_ignore = input_params.ids_to_ignore;
|
|
|
+
|
|
|
+ const element = document.querySelector(`[mmid="${mmid}"]`);
|
|
|
+
|
|
|
+ if (!element) {
|
|
|
+ console.log(`No element found with mmid: ${mmid}`);
|
|
|
+ return null;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (ids_to_ignore.includes(element.id)) {
|
|
|
+ console.log(`Ignoring element with id: ${element.id}`, element);
|
|
|
+ return null;
|
|
|
+ }
|
|
|
+ //Ignore "option" because it would have been processed with the select element
|
|
|
+ if (tags_to_ignore.includes(element.tagName.toLowerCase()) || element.tagName.toLowerCase() === "option") return null;
|
|
|
+
|
|
|
+ let attributes_to_values = {
|
|
|
+ 'tag': element.tagName.toLowerCase() // Always include the tag name
|
|
|
+ };
|
|
|
+
|
|
|
+ // If the element is an input, include its type as well
|
|
|
+ if (element.tagName.toLowerCase() === 'input') {
|
|
|
+ attributes_to_values['tag_type'] = element.type; // This will capture 'checkbox', 'radio', etc.
|
|
|
+ }
|
|
|
+ else if (element.tagName.toLowerCase() === 'select') {
|
|
|
+ attributes_to_values["mmid"] = element.getAttribute('mmid');
|
|
|
+ attributes_to_values["role"] = "combobox";
|
|
|
+ attributes_to_values["options"] = [];
|
|
|
+
|
|
|
+ for (const option of element.options) {
|
|
|
+ let option_attributes_to_values = {
|
|
|
+ "mmid": option.getAttribute('mmid'),
|
|
|
+ "text": option.text,
|
|
|
+ "value": option.value,
|
|
|
+ "selected": option.selected
|
|
|
+ };
|
|
|
+ attributes_to_values["options"].push(option_attributes_to_values);
|
|
|
+ }
|
|
|
+ return attributes_to_values;
|
|
|
+ }
|
|
|
+
|
|
|
+ for (const attribute of attributes) {
|
|
|
+ let value = element.getAttribute(attribute);
|
|
|
+
|
|
|
+ if(value){
|
|
|
+ /*
|
|
|
+ if(attribute === 'href'){
|
|
|
+ value = value.split('?')[0]
|
|
|
+ }
|
|
|
+ */
|
|
|
+ attributes_to_values[attribute] = value;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ if (should_fetch_inner_text && element.innerText) {
|
|
|
+ attributes_to_values['description'] = element.innerText;
|
|
|
+ }
|
|
|
+
|
|
|
+ let role = element.getAttribute('role');
|
|
|
+ if(role==='listbox' || element.tagName.toLowerCase()=== 'ul'){
|
|
|
+ let children=element.children;
|
|
|
+ let filtered_children = Array.from(children).filter(child => child.getAttribute('role') === 'option');
|
|
|
+ console.log("Listbox or ul found: ", filtered_children);
|
|
|
+ let attributes_to_include = ['mmid', 'role', 'aria-label','value'];
|
|
|
+ attributes_to_values["additional_info"]=[]
|
|
|
+ for (const child of children) {
|
|
|
+ let children_attributes_to_values = {};
|
|
|
+
|
|
|
+ for (let attr of child.attributes) {
|
|
|
+ // If the attribute is not in the predefined list, add it to children_attributes_to_values
|
|
|
+ if (attributes_to_include.includes(attr.name)) {
|
|
|
+ children_attributes_to_values[attr.name] = attr.value;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ attributes_to_values["additional_info"].push(children_attributes_to_values);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ // Check if attributes_to_values contains more than just 'name', 'role', and 'mmid'
|
|
|
+ const keys = Object.keys(attributes_to_values);
|
|
|
+ const minimalKeys = ['tag', 'mmid'];
|
|
|
+ const hasMoreThanMinimalKeys = keys.length > minimalKeys.length || keys.some(key => !minimalKeys.includes(key));
|
|
|
+
|
|
|
+ if (!hasMoreThanMinimalKeys) {
|
|
|
+ //If there were no attributes found, then try to get the backup attributes
|
|
|
+ for (const backupAttribute of input_params.backup_attributes) {
|
|
|
+ let value = element.getAttribute(backupAttribute);
|
|
|
+ if(value){
|
|
|
+ attributes_to_values[backupAttribute] = value;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ //if even the backup attributes are not found, then return null, which will cause this element to be skipped
|
|
|
+ if(Object.keys(attributes_to_values).length <= minimalKeys.length) {
|
|
|
+ if (element.tagName.toLowerCase() === 'button') {
|
|
|
+ attributes_to_values["mmid"] = element.getAttribute('mmid');
|
|
|
+ attributes_to_values["role"] = "button";
|
|
|
+ attributes_to_values["additional_info"] = [];
|
|
|
+ let children=element.children;
|
|
|
+ let attributes_to_exclude = ['width', 'height', 'path', 'class', 'viewBox', 'mmid']
|
|
|
+
|
|
|
+ // Check if the button has no text and no attributes
|
|
|
+ if (element.innerText.trim() === '') {
|
|
|
+
|
|
|
+ for (const child of children) {
|
|
|
+ let children_attributes_to_values = {};
|
|
|
+
|
|
|
+ for (let attr of child.attributes) {
|
|
|
+ // If the attribute is not in the predefined list, add it to children_attributes_to_values
|
|
|
+ if (!attributes_to_exclude.includes(attr.name)) {
|
|
|
+ children_attributes_to_values[attr.name] = attr.value;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ attributes_to_values["additional_info"].push(children_attributes_to_values);
|
|
|
+ }
|
|
|
+ console.log("Button with no text and no attributes: ", attributes_to_values);
|
|
|
+ return attributes_to_values;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ return null; // Return null if only minimal keys are present
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return attributes_to_values;
|
|
|
+ }
|
|
|
+ """
|
|
|
+
|
|
|
+ # Fetch attributes and possibly 'innerText' from the DOM element by 'mmid'
|
|
|
+ element_attributes = await page.evaluate(js_code,
|
|
|
+ {"mmid": mmid, "attributes": attributes, "backup_attributes": backup_attributes,
|
|
|
+ "should_fetch_inner_text": should_fetch_inner_text,
|
|
|
+ "tags_to_ignore": tags_to_ignore,
|
|
|
+ "ids_to_ignore": ids_to_ignore})
|
|
|
+
|
|
|
+ if 'keyshortcuts' in node:
|
|
|
+ del node['keyshortcuts'] #remove keyshortcuts since it is not needed
|
|
|
+
|
|
|
+ node["mmid"]=mmid
|
|
|
+
|
|
|
+ # Update the node with fetched information
|
|
|
+ if element_attributes:
|
|
|
+ node.update(element_attributes)
|
|
|
+
|
|
|
+ # check if 'name' and 'mmid' are the same
|
|
|
+ if node.get('name') == node.get('mmid') and node.get('role') != "textbox":
|
|
|
+ del node['name'] # Remove 'name' from the node
|
|
|
+
|
|
|
+ if 'name' in node and 'description' in node and (node['name'] == node['description'] or node['name'] == node['description'].replace('\n', ' ') or node['description'].replace('\n', '') in node['name']):
|
|
|
+ del node['description'] #if the name is same as description, then remove the description to avoid duplication
|
|
|
+
|
|
|
+ if 'name' in node and 'aria-label' in node and node['aria-label'] in node['name']:
|
|
|
+ del node['aria-label'] #if the name is same as the aria-label, then remove the aria-label to avoid duplication
|
|
|
+
|
|
|
+ if 'name' in node and 'text' in node and node['name'] == node['text']:
|
|
|
+ del node['text'] #if the name is same as the text, then remove the text to avoid duplication
|
|
|
+
|
|
|
+ if node.get('tag') == "select": #children are not needed for select menus since "options" attriburte is already added
|
|
|
+ node.pop("children", None)
|
|
|
+ node.pop("role", None)
|
|
|
+ node.pop("description", None)
|
|
|
+
|
|
|
+ #role and tag can have the same info. Get rid of role if it is the same as tag
|
|
|
+ if node.get('role') == node.get('tag'):
|
|
|
+ del node['role']
|
|
|
+
|
|
|
+ # avoid duplicate aria-label
|
|
|
+ if node.get("aria-label") and node.get("placeholder") and node.get("aria-label") == node.get("placeholder"):
|
|
|
+ del node["aria-label"]
|
|
|
+
|
|
|
+ if node.get("role") == "link":
|
|
|
+ del node["role"]
|
|
|
+ if node.get("description"):
|
|
|
+ node["text"] = node["description"]
|
|
|
+ del node["description"]
|
|
|
+
|
|
|
+ #textbox just means a text input and that is expressed well enough with the rest of the attributes returned
|
|
|
+ #if node.get('role') == "textbox":
|
|
|
+ # del node['role']
|
|
|
+
|
|
|
+ if node.get('role') == "textbox":
|
|
|
+ #get the id attribute of this field from the DOM
|
|
|
+ if "id" in element_attributes and element_attributes["id"]:
|
|
|
+ #find if there is an element in the DOM that has this id in aria-labelledby.
|
|
|
+ js_code = """
|
|
|
+ (inputParams) => {
|
|
|
+ let referencingElements = [];
|
|
|
+ const referencedElement = document.querySelector(`[aria-labelledby="${inputParams.aria_labelled_by_query_value}"]`);
|
|
|
+ if(referencedElement) {
|
|
|
+ const mmid = referencedElement.getAttribute('mmid');
|
|
|
+ if (mmid) {
|
|
|
+ return {"mmid": mmid, "tag": referencedElement.tagName.toLowerCase()};
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return null;
|
|
|
+ }
|
|
|
+ """
|
|
|
+ #textbox just means a text input and that is expressed well enough with the rest of the attributes returned
|
|
|
+ #del node['role']
|
|
|
+
|
|
|
+ #remove attributes that are not needed once processing of a node is complete
|
|
|
+ for attribute_to_delete in attributes_to_delete:
|
|
|
+ if attribute_to_delete in node:
|
|
|
+ node.pop(attribute_to_delete, None)
|
|
|
+ else:
|
|
|
+ logger.debug(f"No element found with mmid: {mmid}, deleting node: {node}")
|
|
|
+ node["marked_for_deletion_by_mm"] = True
|
|
|
+
|
|
|
+
|
|
|
+ # Process each node in the tree starting from the root
|
|
|
+ await process_node(accessibility_tree)
|
|
|
+
|
|
|
+ pruned_tree = __prune_tree(accessibility_tree, only_input_fields)
|
|
|
+
|
|
|
+ logger.debug("Reconciliation complete")
|
|
|
+ return pruned_tree
|
|
|
+
|
|
|
+
|
|
|
+async def __cleanup_dom(page: Page):
|
|
|
+ """
|
|
|
+ Cleans up the DOM by removing injected 'aria-description' attributes and restoring any original 'aria-keyshortcuts'
|
|
|
+ from 'orig-aria-keyshortcuts'.
|
|
|
+ """
|
|
|
+ logger.debug("Cleaning up the DOM's previous injections")
|
|
|
+ await page.evaluate("""() => {
|
|
|
+ const allElements = document.querySelectorAll('*[mmid]');
|
|
|
+ allElements.forEach(element => {
|
|
|
+ element.removeAttribute('aria-keyshortcuts');
|
|
|
+ const origAriaLabel = element.getAttribute('orig-aria-keyshortcuts');
|
|
|
+ if (origAriaLabel) {
|
|
|
+ element.setAttribute('aria-keyshortcuts', origAriaLabel);
|
|
|
+ element.removeAttribute('orig-aria-keyshortcuts');
|
|
|
+ }
|
|
|
+ });
|
|
|
+ }""")
|
|
|
+ logger.debug("DOM cleanup complete")
|
|
|
+
|
|
|
+
|
|
|
+def __prune_tree(node: dict[str, Any], only_input_fields: bool) -> dict[str, Any] | None:
|
|
|
+ """
|
|
|
+ Recursively prunes a tree starting from `node`, based on pruning conditions and handling of 'unraveling'.
|
|
|
+
|
|
|
+ The function has two main jobs:
|
|
|
+ 1. Pruning: Remove nodes that don't meet certain conditions, like being marked for deletion.
|
|
|
+ 2. Unraveling: For nodes marked with 'marked_for_unravel_children', we replace them with their children,
|
|
|
+ effectively removing the node and lifting its children up a level in the tree.
|
|
|
+
|
|
|
+ This happens in place, meaning we modify the tree as we go, which is efficient but means you should
|
|
|
+ be cautious about modifying the tree outside this function during a prune operation.
|
|
|
+
|
|
|
+ Args:
|
|
|
+ - node (Dict[str, Any]): The node we're currently looking at. We'll check this node, its children,
|
|
|
+ and so on, recursively down the tree.
|
|
|
+ - only_input_fields (bool): If True, we're only interested in pruning input-related nodes (like form fields).
|
|
|
+ This lets you narrow the focus if, for example, you're only interested in cleaning up form-related parts
|
|
|
+ of a larger tree.
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ - dict[str, Any] | None: The pruned version of `node`, or None if `node` was pruned away. When we 'unravel'
|
|
|
+ a node, we directly replace it with its children in the parent's list of children, so the return value
|
|
|
+ will be the parent, updated in place.
|
|
|
+
|
|
|
+ Notes:
|
|
|
+ - 'marked_for_deletion_by_mm' is our flag for nodes that should definitely be removed.
|
|
|
+ - Unraveling is neat for flattening the tree when a node is just a wrapper without semantic meaning.
|
|
|
+ - We use a while loop with manual index management to safely modify the list of children as we iterate over it.
|
|
|
+ """
|
|
|
+ if "marked_for_deletion_by_mm" in node:
|
|
|
+ return None
|
|
|
+
|
|
|
+ if 'children' in node:
|
|
|
+ i = 0
|
|
|
+ while i < len(node['children']):
|
|
|
+ child = node['children'][i]
|
|
|
+ if 'marked_for_unravel_children' in child:
|
|
|
+ # Replace the current child with its children
|
|
|
+ if 'children' in child:
|
|
|
+ node['children'] = node['children'][:i] + child['children'] + node['children'][i+1:]
|
|
|
+ i += len(child['children']) - 1 # Adjust the index for the new children
|
|
|
+ else:
|
|
|
+ # If the node marked for unraveling has no children, remove it
|
|
|
+ node['children'].pop(i)
|
|
|
+ i -= 1 # Adjust the index since we removed an element
|
|
|
+ else:
|
|
|
+ # Recursively prune the child if it's not marked for unraveling
|
|
|
+ pruned_child = __prune_tree(child, only_input_fields)
|
|
|
+ if pruned_child is None:
|
|
|
+ # If the child is pruned, remove it from the children list
|
|
|
+ node['children'].pop(i)
|
|
|
+ i -= 1 # Adjust the index since we removed an element
|
|
|
+ else:
|
|
|
+ # Update the child with the pruned version
|
|
|
+ node['children'][i] = pruned_child
|
|
|
+ i += 1 # Move to the next child
|
|
|
+
|
|
|
+ # After processing all children, if the children array is empty, remove it
|
|
|
+ if not node['children']:
|
|
|
+ del node['children']
|
|
|
+
|
|
|
+ # Apply existing conditions to decide if the current node should be pruned
|
|
|
+ return None if __should_prune_node(node, only_input_fields) else node
|
|
|
+
|
|
|
+
|
|
|
+def __should_prune_node(node: dict[str, Any], only_input_fields: bool):
|
|
|
+ """
|
|
|
+ Determines if a node should be pruned based on its 'role' and 'element_attributes'.
|
|
|
+
|
|
|
+ Args:
|
|
|
+ node (dict[str, Any]): The node to be evaluated.
|
|
|
+ only_input_fields (bool): Flag indicating whether only input fields should be considered.
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ bool: True if the node should be pruned, False otherwise.
|
|
|
+ """
|
|
|
+ #If the request is for only input fields and this is not an input field, then mark the node for prunning
|
|
|
+ if node.get("role") != "WebArea" and only_input_fields and not (node.get("tag") in ("input", "button", "textarea") or node.get("role") == "button"):
|
|
|
+ return True
|
|
|
+
|
|
|
+ if node.get('role') == 'generic' and 'children' not in node and not ('name' in node and node.get('name')): # The presence of 'children' is checked after potentially deleting it above
|
|
|
+ return True
|
|
|
+
|
|
|
+ if node.get('role') in ['separator', 'LineBreak']:
|
|
|
+ return True
|
|
|
+ processed_name = ""
|
|
|
+ if 'name' in node:
|
|
|
+ processed_name:str =node.get('name') # type: ignore
|
|
|
+ processed_name = processed_name.replace(',', '')
|
|
|
+ processed_name = processed_name.replace(':', '')
|
|
|
+ processed_name = processed_name.replace('\n', '')
|
|
|
+ processed_name = processed_name.strip()
|
|
|
+ if len(processed_name) <3:
|
|
|
+ processed_name = ""
|
|
|
+
|
|
|
+ #check if the node only have name and role, then delete that node
|
|
|
+ if len(node) == 2 and 'name' in node and 'role' in node and not (node.get('role') == "text" and processed_name != ""):
|
|
|
+ return True
|
|
|
+ return False
|
|
|
+
|
|
|
+async def get_node_dom_element(page: Page, mmid: str):
|
|
|
+ return await page.evaluate("""
|
|
|
+ (mmid) => {
|
|
|
+ return document.querySelector(`[mmid="${mmid}"]`);
|
|
|
+ }
|
|
|
+ """, mmid)
|
|
|
+
|
|
|
+
|
|
|
+async def get_element_attributes(page: Page, mmid: str, attributes: list[str]):
|
|
|
+ return await page.evaluate("""
|
|
|
+ (inputParams) => {
|
|
|
+ const mmid = inputParams.mmid;
|
|
|
+ const attributes = inputParams.attributes;
|
|
|
+ const element = document.querySelector(`[mmid="${mmid}"]`);
|
|
|
+ if (!element) return null; // Return null if element is not found
|
|
|
+
|
|
|
+ let attrs = {};
|
|
|
+ for (let attr of attributes) {
|
|
|
+ attrs[attr] = element.getAttribute(attr);
|
|
|
+ }
|
|
|
+ return attrs;
|
|
|
+ }
|
|
|
+ """, {"mmid": mmid, "attributes": attributes})
|
|
|
+
|
|
|
+
|
|
|
+async def get_dom_with_accessibility_info() -> Annotated[dict[str, Any] | None, "A minified representation of the HTML DOM for the current webpage"]:
|
|
|
+ """
|
|
|
+ Retrieves, processes, and minifies the Accessibility tree of the active page in a browser instance.
|
|
|
+ Strictly follow the name and role tag for any interaction with the nodes.
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ - The minified JSON content of the browser's active page.
|
|
|
+ """
|
|
|
+ logger.debug("Executing Get Accessibility Tree Command")
|
|
|
+ # Create and use the PlaywrightManager
|
|
|
+ browser_manager = PlaywrightManager(PlaywrightOptions())
|
|
|
+ page = await browser_manager.get_current_page()
|
|
|
+ if page is None: # type: ignore
|
|
|
+ raise ValueError('No active page found')
|
|
|
+
|
|
|
+ return await do_get_accessibility_info(page)
|
|
|
+
|
|
|
+
|
|
|
+async def do_get_accessibility_info(page: Page, only_input_fields: bool = False, log_dir: Optional[Path] = None):
|
|
|
+ """
|
|
|
+ Retrieves the accessibility information of a web page and saves it as JSON files.
|
|
|
+
|
|
|
+ Args:
|
|
|
+ page (Page): The page object representing the web page.
|
|
|
+ only_input_fields (bool, optional): If True, only retrieves accessibility information for input fields.
|
|
|
+ Defaults to False.
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ dict[str, Any] or None: The enhanced accessibility tree as a dictionary, or None if an error occurred.
|
|
|
+ """
|
|
|
+ await __inject_attributes(page)
|
|
|
+ accessibility_tree: dict[str, Any] = await page.accessibility.snapshot(interesting_only=True) # type: ignore
|
|
|
+
|
|
|
+ if log_dir is not None:
|
|
|
+ with open(os.path.join(log_dir, 'json_accessibility_dom.json'), 'w', encoding='utf-8') as f:
|
|
|
+ f.write(json.dumps(accessibility_tree, indent=2))
|
|
|
+ logger.debug("json_accessibility_dom.json saved")
|
|
|
+
|
|
|
+ await __cleanup_dom(page)
|
|
|
+ try:
|
|
|
+ enhanced_tree = await __fetch_dom_info(page, accessibility_tree, only_input_fields)
|
|
|
+
|
|
|
+ logger.debug("Enhanced Accessibility Tree ready")
|
|
|
+
|
|
|
+ if log_dir is not None:
|
|
|
+ with open(os.path.join(log_dir, 'json_accessibility_dom_enriched.json'), 'w', encoding='utf-8') as f:
|
|
|
+ f.write(json.dumps(enhanced_tree, indent=2))
|
|
|
+ logger.debug("json_accessibility_dom_enriched.json saved")
|
|
|
+
|
|
|
+ return enhanced_tree
|
|
|
+ except Exception as e:
|
|
|
+ logger.error(f"Error while fetching DOM info: {e}")
|
|
|
+ traceback.print_exc()
|
|
|
+ return None
|