memorium/lib/playwright.ts

57 lines
1.8 KiB
TypeScript
Raw Normal View History

2025-01-18 01:52:28 +01:00
import { firefox } from "npm:playwright-extra";
2025-01-18 00:46:05 +01:00
import { createStreamResponse } from "@lib/helpers.ts";
import StealthPlugin from "npm:puppeteer-extra-plugin-stealth";
const userAgentStrings = [
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.2227.0 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.3497.92 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36",
];
2025-01-18 01:52:28 +01:00
firefox.use(StealthPlugin());
2025-01-18 00:46:05 +01:00
export async function fetchHtmlWithPlaywright(
fetchUrl: string,
streamResponse: ReturnType<typeof createStreamResponse>,
): Promise<string> {
streamResponse.enqueue("booting up playwright");
2025-01-18 00:46:05 +01:00
// Launch the Playwright browser
2025-01-18 01:52:28 +01:00
const browser = await firefox.launch();
2025-01-18 00:46:05 +01:00
streamResponse.enqueue("fetching html");
try {
// Open a new browser context and page
const context = await browser.newContext({
userAgent:
userAgentStrings[Math.floor(Math.random() * userAgentStrings.length)],
});
//add init script
await context.addInitScript(
"Object.defineProperty(navigator, 'webdriver', {get: () => undefined})",
);
const page = await context.newPage();
// Navigate to the URL
await page.goto(fetchUrl, {
waitUntil: "domcontentloaded", // Wait for the DOM to load
});
// Get the HTML content of the page
const html = await page.content();
return html;
} catch (error) {
streamResponse.enqueue("error fetching html");
console.error(error);
return "";
} finally {
// Close the browser
await browser.close();
}
}