feat: fallback to unsplash cover when article contains no image

This commit is contained in:
Max Richter
2025-11-09 23:52:53 +01:00
parent 6c6b69a46a
commit 655fc648e6
27 changed files with 687 additions and 224 deletions

View File

@@ -3,6 +3,7 @@ import { Defuddle } from "defuddle/node";
import { AccessDeniedError, BadRequestError } from "@lib/errors.ts";
import { createStreamResponse, isValidUrl } from "@lib/helpers.ts";
import * as openai from "@lib/openai.ts";
import * as unsplash from "@lib/unsplash.ts";
import { getYoutubeVideoDetails } from "@lib/youtube.ts";
import {
extractYoutubeId,
@@ -19,6 +20,35 @@ import { fileExtension } from "https://deno.land/x/file_extension@v2.1.0/mod.ts"
const log = createLogger("api/article");
async function getUnsplashCoverImage(
content: string,
streamResponse: ReturnType<typeof createStreamResponse>,
): Promise<string | undefined> {
try {
streamResponse.info("creating unsplash search term");
const searchTerm = await openai.createUnsplashSearchTerm(content);
if (!searchTerm) return;
streamResponse.info(`searching for ${searchTerm}`);
const unsplashUrl = await unsplash.getImageBySearchTerm(searchTerm);
return unsplashUrl;
} catch (e) {
log.error("Failed to get unsplash cover image", e);
return undefined;
}
}
function ext(str: string) {
try {
const u = new URL(str);
if (u.searchParams.has("fm")) {
return u.searchParams.get("fm")!;
}
return fileExtension(u.pathname);
} catch (_e) {
return fileExtension(str);
}
}
async function fetchAndStoreCover(
imageUrl: string | undefined,
title: string,
@@ -26,12 +56,12 @@ async function fetchAndStoreCover(
): Promise<string | undefined> {
if (!imageUrl) return;
const imagePath = `articles/images/${safeFileName(title)}_cover.${
fileExtension(imageUrl)
ext(imageUrl)
}`;
try {
streamResponse?.enqueue("downloading image");
streamResponse?.info("downloading image");
const res = await fetch(imageUrl);
streamResponse?.enqueue("saving image");
streamResponse?.info("saving image");
if (!res.ok) {
console.log(`Failed to download remote image: ${imageUrl}`, res.status);
return;
@@ -53,38 +83,43 @@ async function processCreateArticle(
) {
log.info("create article from url", { url: fetchUrl });
streamResponse.enqueue("downloading article");
streamResponse.info("downloading article");
const doc = await webScrape(fetchUrl, streamResponse);
const result = await webScrape(fetchUrl, streamResponse);
const result = await Defuddle(doc, fetchUrl, {
markdown: true,
});
log.debug("downloaded and parse parsed", result);
log.debug("downloaded and parse parsed", {
...result,
url: fetchUrl,
content: result.content.slice(0, 200),
});
streamResponse.info("parsed article, creating tags with openai");
streamResponse.enqueue("parsed article, creating tags with openai");
const aiMeta = await openai.extractArticleMetadata(result.markdown);
const aiMeta = await openai.extractArticleMetadata(result.content);
streamResponse.enqueue("postprocessing article");
streamResponse.info("postprocessing article");
const title = result?.title || aiMeta?.headline || "";
const coverImagePath = await fetchAndStoreCover(
result.image,
title,
streamResponse,
);
let coverImagePath: string | undefined = undefined;
if (result?.image?.length) {
log.debug("using local image for cover image", { image: result.image });
coverImagePath = await fetchAndStoreCover(
result.image,
title,
streamResponse,
);
} else {
const urlPath = await getUnsplashCoverImage(
result.markdown,
streamResponse,
);
coverImagePath = await fetchAndStoreCover(urlPath, title, streamResponse);
log.debug("using unsplash for cover image", { image: coverImagePath });
}
const url = toUrlSafeString(title);
const newArticle: ArticleResource["content"] = {
_type: "Article",
headline: title,
articleBody: result.content,
articleBody: result.markdown,
url: fetchUrl,
datePublished: formatDate(
result?.published || aiMeta?.datePublished || undefined,
@@ -100,16 +135,16 @@ async function processCreateArticle(
},
} as const;
streamResponse.enqueue("writing to disk");
streamResponse.info("writing to disk");
log.debug("writing to disk", {
...newArticle,
articleBody: newArticle.articleBody?.slice(0, 200),
});
await createResource(`articles/${toUrlSafeString(title)}.md`, newArticle);
await createResource(`articles/${url}.md`, newArticle);
streamResponse.enqueue("id: " + title);
streamResponse.send({ type: "finished", url });
}
async function processCreateYoutubeVideo(
@@ -122,13 +157,13 @@ async function processCreateYoutubeVideo(
url: fetchUrl,
});
streamResponse.enqueue("getting video infos from youtube api");
streamResponse.info("getting video infos from youtube api");
const youtubeId = extractYoutubeId(fetchUrl);
const video = await getYoutubeVideoDetails(youtubeId);
streamResponse.enqueue("shortening title with openai");
streamResponse.info("shortening title with openai");
const videoTitle = await openai.shortenTitle(video.snippet.title) ||
video.snippet.title;
@@ -152,16 +187,18 @@ async function processCreateYoutubeVideo(
},
};
streamResponse.enqueue("creating article");
streamResponse.info("creating article");
const filename = toUrlSafeString(videoTitle);
await createResource(
`articles/${toUrlSafeString(videoTitle)}.md`,
`articles/${filename}.md`,
newArticle,
);
streamResponse.enqueue("finished");
streamResponse.info("finished");
streamResponse.enqueue("id: " + toUrlSafeString(videoTitle));
streamResponse.send({ type: "finished", url: filename });
}
export const handler: Handlers = {