feat: fallback to unsplash cover when article contains no image
This commit is contained in:
@@ -1,6 +1,8 @@
|
||||
import { JSDOM } from "jsdom";
|
||||
import { fetchHtmlWithPlaywright } from "./playwright.ts";
|
||||
import { createStreamResponse } from "./helpers.ts";
|
||||
import { Defuddle } from "defuddle/node";
|
||||
import TurndownService from "turndown";
|
||||
|
||||
/**
|
||||
* Mutates the given JSDOM instance: rewrites all relevant URL-bearing attributes
|
||||
@@ -164,6 +166,8 @@ function absolutizeMetaRefresh(content: string, base: string): string {
|
||||
return `${delay}; url=${abs}`;
|
||||
}
|
||||
|
||||
const turndownService = new TurndownService();
|
||||
|
||||
export async function webScrape(
|
||||
url: string,
|
||||
streamResponse: ReturnType<typeof createStreamResponse>,
|
||||
@@ -172,5 +176,12 @@ export async function webScrape(
|
||||
const html = await fetchHtmlWithPlaywright(url, streamResponse);
|
||||
const dom = new JSDOM(html);
|
||||
absolutizeDomUrls(dom, u.origin);
|
||||
return dom;
|
||||
|
||||
const result = await Defuddle(dom, url);
|
||||
|
||||
return {
|
||||
...result,
|
||||
dom,
|
||||
markdown: turndownService.turndown(result.content),
|
||||
};
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user