Skip to content

Commit

Permalink
fix(puppeteer driver): clean the view-hierarchy from unused data.
Browse files Browse the repository at this point in the history
  • Loading branch information
asafkorem committed Jan 25, 2025
1 parent 88b67ae commit d8a3a47
Show file tree
Hide file tree
Showing 4 changed files with 76 additions and 13 deletions.
4 changes: 1 addition & 3 deletions examples/puppeteer/tests/example.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,7 @@ describe("Example Test Suite", () => {

it("perform test with pilot", async () => {
await copilot.pilot(
"Enter https://example.com/, press on more information, " +
"expect to be redirected to IANA site, summarize the findings. " +
"Open in non-headless mode.",
"Enter https://freshuk.co.il/ and check the price of tomatoes, use non-headless mode",
);
});
});
71 changes: 71 additions & 0 deletions src/drivers/puppeteer/getCleanDOM.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
import * as puppeteer from "puppeteer";

/**
* Get clean DOM from the page content
* - Removes hidden elements
* - Removes ads, analytics, tracking elements
* - Removes unnecessary attributes
* - Removes empty elements
* @param page
*/
export default async function getCleanDOM(page: puppeteer.Page) {
return await page.evaluate(() => {
const copiedDocument = document.cloneNode(true) as Document;

copiedDocument
.querySelectorAll('[hidden], [aria-hidden="true"]')
.forEach((el) => el.remove());

const removeSelectors = [
"script",
"style",
"link",
"meta",
"noscript",
"iframe",
'[class*="ads"]',
'[id*="ads"]',
'[class*="analytics"]',
'[class*="tracking"]',
"footer",
"header",
"nav",
"path",
"aside",
];

const allowedAttributes = [
"src",
"href",
"alt",
"title",
"aria-label",
"aria-labelledby",
"aria-describedby",
"aria-hidden",
"role",
"class",
"id",
"data-*",
];

copiedDocument.querySelectorAll("*").forEach((el) => {
Array.from(el.attributes).forEach((attr) => {
if (!allowedAttributes.includes(attr.name)) {
el.removeAttribute(attr.name);
}
});

if (!el.innerHTML.trim()) {
el.remove();
}
});

removeSelectors.forEach((selector) => {
copiedDocument.querySelectorAll(selector).forEach((el) => el.remove());
});

const mainContent = copiedDocument.body.innerHTML;
return mainContent.replace(/\s+/g, " ").trim();
});
}
3 changes: 2 additions & 1 deletion src/drivers/puppeteer/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import { TestingFrameworkAPICatalog, TestingFrameworkDriver } from "@/types";
import * as puppeteer from "puppeteer";
import path from "path";
import fs from "fs";
import getCleanDOM from "./getCleanDOM";

export class PuppeteerFrameworkDriver implements TestingFrameworkDriver {
private currentPage?: puppeteer.Page;
Expand Down Expand Up @@ -59,7 +60,7 @@ export class PuppeteerFrameworkDriver implements TestingFrameworkDriver {
);
}

return await this.currentPage.content();
return await getCleanDOM(this.currentPage);
}

/**
Expand Down
11 changes: 2 additions & 9 deletions src/utils/CodeEvaluator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,6 @@ export class CodeEvaluator {
context: any,
sharedContext: Record<string, any> = {},
): Promise<CodeEvaluationResult> {
const loggerSpinner = logger.startSpinner({
message: `Copilot evaluating code: \n\`\`\`\n${code}\n\`\`\`\n`,
isBold: false,
color: "gray",
});

const asyncFunction = this.createAsyncFunction(
code,
context,
Expand All @@ -22,12 +16,11 @@ export class CodeEvaluator {

try {
const result = await asyncFunction();
loggerSpinner.stop("success", `Copilot evaluated the code successfully`);

return { code, result, sharedContext };
} catch (error) {
loggerSpinner.stop("failure", {
message: `Copilot failed to evaluate the code: \n\`\`\`\n${code}\n\`\`\``,
logger.error({
message: `\nCopilot failed to evaluate the code: \n\`\`\`\n${code}\n\`\`\``,
isBold: false,
color: "gray",
});
Expand Down

0 comments on commit d8a3a47

Please sign in to comment.