memorium/lib/playwright.ts
2025-01-19 20:25:47 +01:00

60 lines
1.9 KiB
TypeScript

import { firefox } from "npm:playwright-extra";
import { createStreamResponse } from "@lib/helpers.ts";
import StealthPlugin from "npm:puppeteer-extra-plugin-stealth";
import * as env from "@lib/env.ts";
const userAgentStrings = [
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.2227.0 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.3497.92 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36",
];
firefox.use(StealthPlugin());
export async function fetchHtmlWithPlaywright(
fetchUrl: string,
streamResponse: ReturnType<typeof createStreamResponse>,
): Promise<string> {
streamResponse.enqueue("booting up playwright");
const config: Parameters<typeof firefox.launch>[0] = {};
if (env.PROXY_SERVER) {
config.proxy = {
server: env.PROXY_SERVER,
username: env.PROXY_USERNAME,
password: env.PROXY_PASSWORD,
};
console.log("Using proxy server", config);
}
// Launch the Playwright browser
const browser = await firefox.launch(config);
streamResponse.enqueue("fetching html");
try {
// Open a new browser context and page
const context = await browser.newContext();
const page = await context.newPage();
// Navigate to the URL
await page.goto(fetchUrl, {
waitUntil: "domcontentloaded", // Wait for the DOM to load
});
// Get the HTML content of the page
const html = await page.content();
return html;
} catch (error) {
streamResponse.enqueue("error fetching html");
console.error(error);
return "";
} finally {
// Close the browser
await browser.close();
}
}