feat: fallback to unsplash cover when article contains no image
This commit is contained in:
@@ -3,6 +3,7 @@ import { Defuddle } from "defuddle/node";
|
||||
import { AccessDeniedError, BadRequestError } from "@lib/errors.ts";
|
||||
import { createStreamResponse, isValidUrl } from "@lib/helpers.ts";
|
||||
import * as openai from "@lib/openai.ts";
|
||||
import * as unsplash from "@lib/unsplash.ts";
|
||||
import { getYoutubeVideoDetails } from "@lib/youtube.ts";
|
||||
import {
|
||||
extractYoutubeId,
|
||||
@@ -19,6 +20,35 @@ import { fileExtension } from "https://deno.land/x/file_extension@v2.1.0/mod.ts"
|
||||
|
||||
const log = createLogger("api/article");
|
||||
|
||||
async function getUnsplashCoverImage(
|
||||
content: string,
|
||||
streamResponse: ReturnType<typeof createStreamResponse>,
|
||||
): Promise<string | undefined> {
|
||||
try {
|
||||
streamResponse.info("creating unsplash search term");
|
||||
const searchTerm = await openai.createUnsplashSearchTerm(content);
|
||||
if (!searchTerm) return;
|
||||
streamResponse.info(`searching for ${searchTerm}`);
|
||||
const unsplashUrl = await unsplash.getImageBySearchTerm(searchTerm);
|
||||
return unsplashUrl;
|
||||
} catch (e) {
|
||||
log.error("Failed to get unsplash cover image", e);
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
function ext(str: string) {
|
||||
try {
|
||||
const u = new URL(str);
|
||||
if (u.searchParams.has("fm")) {
|
||||
return u.searchParams.get("fm")!;
|
||||
}
|
||||
return fileExtension(u.pathname);
|
||||
} catch (_e) {
|
||||
return fileExtension(str);
|
||||
}
|
||||
}
|
||||
|
||||
async function fetchAndStoreCover(
|
||||
imageUrl: string | undefined,
|
||||
title: string,
|
||||
@@ -26,12 +56,12 @@ async function fetchAndStoreCover(
|
||||
): Promise<string | undefined> {
|
||||
if (!imageUrl) return;
|
||||
const imagePath = `articles/images/${safeFileName(title)}_cover.${
|
||||
fileExtension(imageUrl)
|
||||
ext(imageUrl)
|
||||
}`;
|
||||
try {
|
||||
streamResponse?.enqueue("downloading image");
|
||||
streamResponse?.info("downloading image");
|
||||
const res = await fetch(imageUrl);
|
||||
streamResponse?.enqueue("saving image");
|
||||
streamResponse?.info("saving image");
|
||||
if (!res.ok) {
|
||||
console.log(`Failed to download remote image: ${imageUrl}`, res.status);
|
||||
return;
|
||||
@@ -53,38 +83,43 @@ async function processCreateArticle(
|
||||
) {
|
||||
log.info("create article from url", { url: fetchUrl });
|
||||
|
||||
streamResponse.enqueue("downloading article");
|
||||
streamResponse.info("downloading article");
|
||||
|
||||
const doc = await webScrape(fetchUrl, streamResponse);
|
||||
const result = await webScrape(fetchUrl, streamResponse);
|
||||
|
||||
const result = await Defuddle(doc, fetchUrl, {
|
||||
markdown: true,
|
||||
});
|
||||
log.debug("downloaded and parse parsed", result);
|
||||
|
||||
log.debug("downloaded and parse parsed", {
|
||||
...result,
|
||||
url: fetchUrl,
|
||||
content: result.content.slice(0, 200),
|
||||
});
|
||||
streamResponse.info("parsed article, creating tags with openai");
|
||||
|
||||
streamResponse.enqueue("parsed article, creating tags with openai");
|
||||
const aiMeta = await openai.extractArticleMetadata(result.markdown);
|
||||
|
||||
const aiMeta = await openai.extractArticleMetadata(result.content);
|
||||
|
||||
streamResponse.enqueue("postprocessing article");
|
||||
streamResponse.info("postprocessing article");
|
||||
|
||||
const title = result?.title || aiMeta?.headline || "";
|
||||
|
||||
const coverImagePath = await fetchAndStoreCover(
|
||||
result.image,
|
||||
title,
|
||||
streamResponse,
|
||||
);
|
||||
let coverImagePath: string | undefined = undefined;
|
||||
if (result?.image?.length) {
|
||||
log.debug("using local image for cover image", { image: result.image });
|
||||
coverImagePath = await fetchAndStoreCover(
|
||||
result.image,
|
||||
title,
|
||||
streamResponse,
|
||||
);
|
||||
} else {
|
||||
const urlPath = await getUnsplashCoverImage(
|
||||
result.markdown,
|
||||
streamResponse,
|
||||
);
|
||||
coverImagePath = await fetchAndStoreCover(urlPath, title, streamResponse);
|
||||
log.debug("using unsplash for cover image", { image: coverImagePath });
|
||||
}
|
||||
|
||||
const url = toUrlSafeString(title);
|
||||
|
||||
const newArticle: ArticleResource["content"] = {
|
||||
_type: "Article",
|
||||
headline: title,
|
||||
articleBody: result.content,
|
||||
articleBody: result.markdown,
|
||||
url: fetchUrl,
|
||||
datePublished: formatDate(
|
||||
result?.published || aiMeta?.datePublished || undefined,
|
||||
@@ -100,16 +135,16 @@ async function processCreateArticle(
|
||||
},
|
||||
} as const;
|
||||
|
||||
streamResponse.enqueue("writing to disk");
|
||||
streamResponse.info("writing to disk");
|
||||
|
||||
log.debug("writing to disk", {
|
||||
...newArticle,
|
||||
articleBody: newArticle.articleBody?.slice(0, 200),
|
||||
});
|
||||
|
||||
await createResource(`articles/${toUrlSafeString(title)}.md`, newArticle);
|
||||
await createResource(`articles/${url}.md`, newArticle);
|
||||
|
||||
streamResponse.enqueue("id: " + title);
|
||||
streamResponse.send({ type: "finished", url });
|
||||
}
|
||||
|
||||
async function processCreateYoutubeVideo(
|
||||
@@ -122,13 +157,13 @@ async function processCreateYoutubeVideo(
|
||||
url: fetchUrl,
|
||||
});
|
||||
|
||||
streamResponse.enqueue("getting video infos from youtube api");
|
||||
streamResponse.info("getting video infos from youtube api");
|
||||
|
||||
const youtubeId = extractYoutubeId(fetchUrl);
|
||||
|
||||
const video = await getYoutubeVideoDetails(youtubeId);
|
||||
|
||||
streamResponse.enqueue("shortening title with openai");
|
||||
streamResponse.info("shortening title with openai");
|
||||
const videoTitle = await openai.shortenTitle(video.snippet.title) ||
|
||||
video.snippet.title;
|
||||
|
||||
@@ -152,16 +187,18 @@ async function processCreateYoutubeVideo(
|
||||
},
|
||||
};
|
||||
|
||||
streamResponse.enqueue("creating article");
|
||||
streamResponse.info("creating article");
|
||||
|
||||
const filename = toUrlSafeString(videoTitle);
|
||||
|
||||
await createResource(
|
||||
`articles/${toUrlSafeString(videoTitle)}.md`,
|
||||
`articles/${filename}.md`,
|
||||
newArticle,
|
||||
);
|
||||
|
||||
streamResponse.enqueue("finished");
|
||||
streamResponse.info("finished");
|
||||
|
||||
streamResponse.enqueue("id: " + toUrlSafeString(videoTitle));
|
||||
streamResponse.send({ type: "finished", url: filename });
|
||||
}
|
||||
|
||||
export const handler: Handlers = {
|
||||
|
||||
Reference in New Issue
Block a user