feat: fallback to unsplash cover when article contains no image

This commit is contained in:
Max Richter
2025-11-09 23:52:53 +01:00
parent 6c6b69a46a
commit 655fc648e6
27 changed files with 687 additions and 224 deletions

View File

@@ -1,6 +1,8 @@
import { JSDOM } from "jsdom";
import { fetchHtmlWithPlaywright } from "./playwright.ts";
import { createStreamResponse } from "./helpers.ts";
import { Defuddle } from "defuddle/node";
import TurndownService from "turndown";
/**
* Mutates the given JSDOM instance: rewrites all relevant URL-bearing attributes
@@ -164,6 +166,8 @@ function absolutizeMetaRefresh(content: string, base: string): string {
return `${delay}; url=${abs}`;
}
const turndownService = new TurndownService();
export async function webScrape(
url: string,
streamResponse: ReturnType<typeof createStreamResponse>,
@@ -172,5 +176,12 @@ export async function webScrape(
const html = await fetchHtmlWithPlaywright(url, streamResponse);
const dom = new JSDOM(html);
absolutizeDomUrls(dom, u.origin);
return dom;
const result = await Defuddle(dom, url);
return {
...result,
dom,
markdown: turndownService.turndown(result.content),
};
}