feat: url scraper to recipe
This commit is contained in:
55
lib/playwright.ts
Normal file
55
lib/playwright.ts
Normal file
@@ -0,0 +1,55 @@
|
||||
import { firefox } from "npm:playwright-extra";
|
||||
import { createStreamResponse } from "@lib/helpers.ts";
|
||||
import StealthPlugin from "npm:puppeteer-extra-plugin-stealth";
|
||||
|
||||
const userAgentStrings = [
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.2227.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.3497.92 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36",
|
||||
];
|
||||
|
||||
firefox.use(StealthPlugin());
|
||||
|
||||
export async function fetchHtmlWithPlaywright(
|
||||
fetchUrl: string,
|
||||
streamResponse: ReturnType<typeof createStreamResponse>,
|
||||
): Promise<string> {
|
||||
streamResponse.enqueue("booting up playwright");
|
||||
// Launch the Playwright browser
|
||||
const browser = await firefox.launch();
|
||||
|
||||
streamResponse.enqueue("fetching html");
|
||||
|
||||
try {
|
||||
// Open a new browser context and page
|
||||
const context = await browser.newContext({
|
||||
userAgent:
|
||||
userAgentStrings[Math.floor(Math.random() * userAgentStrings.length)],
|
||||
});
|
||||
|
||||
//add init script
|
||||
await context.addInitScript(
|
||||
"Object.defineProperty(navigator, 'webdriver', {get: () => undefined})",
|
||||
);
|
||||
|
||||
const page = await context.newPage();
|
||||
|
||||
// Navigate to the URL
|
||||
await page.goto(fetchUrl, {
|
||||
waitUntil: "domcontentloaded", // Wait for the DOM to load
|
||||
});
|
||||
|
||||
// Get the HTML content of the page
|
||||
const html = await page.content();
|
||||
|
||||
return html;
|
||||
} catch (error) {
|
||||
streamResponse.enqueue("error fetching html");
|
||||
console.error(error);
|
||||
return "";
|
||||
} finally {
|
||||
// Close the browser
|
||||
await browser.close();
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user