feat: fallback to unsplash cover when article contains no image
This commit is contained in:
@@ -3,6 +3,7 @@ import { Defuddle } from "defuddle/node";
|
||||
import { AccessDeniedError, BadRequestError } from "@lib/errors.ts";
|
||||
import { createStreamResponse, isValidUrl } from "@lib/helpers.ts";
|
||||
import * as openai from "@lib/openai.ts";
|
||||
import * as unsplash from "@lib/unsplash.ts";
|
||||
import { getYoutubeVideoDetails } from "@lib/youtube.ts";
|
||||
import {
|
||||
extractYoutubeId,
|
||||
@@ -19,6 +20,35 @@ import { fileExtension } from "https://deno.land/x/file_extension@v2.1.0/mod.ts"
|
||||
|
||||
const log = createLogger("api/article");
|
||||
|
||||
async function getUnsplashCoverImage(
|
||||
content: string,
|
||||
streamResponse: ReturnType<typeof createStreamResponse>,
|
||||
): Promise<string | undefined> {
|
||||
try {
|
||||
streamResponse.info("creating unsplash search term");
|
||||
const searchTerm = await openai.createUnsplashSearchTerm(content);
|
||||
if (!searchTerm) return;
|
||||
streamResponse.info(`searching for ${searchTerm}`);
|
||||
const unsplashUrl = await unsplash.getImageBySearchTerm(searchTerm);
|
||||
return unsplashUrl;
|
||||
} catch (e) {
|
||||
log.error("Failed to get unsplash cover image", e);
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
function ext(str: string) {
|
||||
try {
|
||||
const u = new URL(str);
|
||||
if (u.searchParams.has("fm")) {
|
||||
return u.searchParams.get("fm")!;
|
||||
}
|
||||
return fileExtension(u.pathname);
|
||||
} catch (_e) {
|
||||
return fileExtension(str);
|
||||
}
|
||||
}
|
||||
|
||||
async function fetchAndStoreCover(
|
||||
imageUrl: string | undefined,
|
||||
title: string,
|
||||
@@ -26,12 +56,12 @@ async function fetchAndStoreCover(
|
||||
): Promise<string | undefined> {
|
||||
if (!imageUrl) return;
|
||||
const imagePath = `articles/images/${safeFileName(title)}_cover.${
|
||||
fileExtension(imageUrl)
|
||||
ext(imageUrl)
|
||||
}`;
|
||||
try {
|
||||
streamResponse?.enqueue("downloading image");
|
||||
streamResponse?.info("downloading image");
|
||||
const res = await fetch(imageUrl);
|
||||
streamResponse?.enqueue("saving image");
|
||||
streamResponse?.info("saving image");
|
||||
if (!res.ok) {
|
||||
console.log(`Failed to download remote image: ${imageUrl}`, res.status);
|
||||
return;
|
||||
@@ -53,38 +83,43 @@ async function processCreateArticle(
|
||||
) {
|
||||
log.info("create article from url", { url: fetchUrl });
|
||||
|
||||
streamResponse.enqueue("downloading article");
|
||||
streamResponse.info("downloading article");
|
||||
|
||||
const doc = await webScrape(fetchUrl, streamResponse);
|
||||
const result = await webScrape(fetchUrl, streamResponse);
|
||||
|
||||
const result = await Defuddle(doc, fetchUrl, {
|
||||
markdown: true,
|
||||
});
|
||||
log.debug("downloaded and parse parsed", result);
|
||||
|
||||
log.debug("downloaded and parse parsed", {
|
||||
...result,
|
||||
url: fetchUrl,
|
||||
content: result.content.slice(0, 200),
|
||||
});
|
||||
streamResponse.info("parsed article, creating tags with openai");
|
||||
|
||||
streamResponse.enqueue("parsed article, creating tags with openai");
|
||||
const aiMeta = await openai.extractArticleMetadata(result.markdown);
|
||||
|
||||
const aiMeta = await openai.extractArticleMetadata(result.content);
|
||||
|
||||
streamResponse.enqueue("postprocessing article");
|
||||
streamResponse.info("postprocessing article");
|
||||
|
||||
const title = result?.title || aiMeta?.headline || "";
|
||||
|
||||
const coverImagePath = await fetchAndStoreCover(
|
||||
result.image,
|
||||
title,
|
||||
streamResponse,
|
||||
);
|
||||
let coverImagePath: string | undefined = undefined;
|
||||
if (result?.image?.length) {
|
||||
log.debug("using local image for cover image", { image: result.image });
|
||||
coverImagePath = await fetchAndStoreCover(
|
||||
result.image,
|
||||
title,
|
||||
streamResponse,
|
||||
);
|
||||
} else {
|
||||
const urlPath = await getUnsplashCoverImage(
|
||||
result.markdown,
|
||||
streamResponse,
|
||||
);
|
||||
coverImagePath = await fetchAndStoreCover(urlPath, title, streamResponse);
|
||||
log.debug("using unsplash for cover image", { image: coverImagePath });
|
||||
}
|
||||
|
||||
const url = toUrlSafeString(title);
|
||||
|
||||
const newArticle: ArticleResource["content"] = {
|
||||
_type: "Article",
|
||||
headline: title,
|
||||
articleBody: result.content,
|
||||
articleBody: result.markdown,
|
||||
url: fetchUrl,
|
||||
datePublished: formatDate(
|
||||
result?.published || aiMeta?.datePublished || undefined,
|
||||
@@ -100,16 +135,16 @@ async function processCreateArticle(
|
||||
},
|
||||
} as const;
|
||||
|
||||
streamResponse.enqueue("writing to disk");
|
||||
streamResponse.info("writing to disk");
|
||||
|
||||
log.debug("writing to disk", {
|
||||
...newArticle,
|
||||
articleBody: newArticle.articleBody?.slice(0, 200),
|
||||
});
|
||||
|
||||
await createResource(`articles/${toUrlSafeString(title)}.md`, newArticle);
|
||||
await createResource(`articles/${url}.md`, newArticle);
|
||||
|
||||
streamResponse.enqueue("id: " + title);
|
||||
streamResponse.send({ type: "finished", url });
|
||||
}
|
||||
|
||||
async function processCreateYoutubeVideo(
|
||||
@@ -122,13 +157,13 @@ async function processCreateYoutubeVideo(
|
||||
url: fetchUrl,
|
||||
});
|
||||
|
||||
streamResponse.enqueue("getting video infos from youtube api");
|
||||
streamResponse.info("getting video infos from youtube api");
|
||||
|
||||
const youtubeId = extractYoutubeId(fetchUrl);
|
||||
|
||||
const video = await getYoutubeVideoDetails(youtubeId);
|
||||
|
||||
streamResponse.enqueue("shortening title with openai");
|
||||
streamResponse.info("shortening title with openai");
|
||||
const videoTitle = await openai.shortenTitle(video.snippet.title) ||
|
||||
video.snippet.title;
|
||||
|
||||
@@ -152,16 +187,18 @@ async function processCreateYoutubeVideo(
|
||||
},
|
||||
};
|
||||
|
||||
streamResponse.enqueue("creating article");
|
||||
streamResponse.info("creating article");
|
||||
|
||||
const filename = toUrlSafeString(videoTitle);
|
||||
|
||||
await createResource(
|
||||
`articles/${toUrlSafeString(videoTitle)}.md`,
|
||||
`articles/${filename}.md`,
|
||||
newArticle,
|
||||
);
|
||||
|
||||
streamResponse.enqueue("finished");
|
||||
streamResponse.info("finished");
|
||||
|
||||
streamResponse.enqueue("id: " + toUrlSafeString(videoTitle));
|
||||
streamResponse.send({ type: "finished", url: filename });
|
||||
}
|
||||
|
||||
export const handler: Handlers = {
|
||||
|
||||
191
routes/api/articles/enhance/[name].ts
Normal file
191
routes/api/articles/enhance/[name].ts
Normal file
@@ -0,0 +1,191 @@
|
||||
import { FreshContext, Handlers } from "$fresh/server.ts";
|
||||
import { fileExtension } from "https://deno.land/x/file_extension@v2.1.0/mod.ts";
|
||||
import { formatDate, safeFileName } from "@lib/string.ts";
|
||||
import { createStreamResponse } from "@lib/helpers.ts";
|
||||
import {
|
||||
AccessDeniedError,
|
||||
BadRequestError,
|
||||
NotFoundError,
|
||||
} from "@lib/errors.ts";
|
||||
import { createResource, fetchResource } from "@lib/marka/index.ts";
|
||||
import { ArticleResource } from "@lib/marka/schema.ts";
|
||||
import { webScrape } from "@lib/webScraper.ts";
|
||||
import * as openai from "@lib/openai.ts";
|
||||
import * as unsplash from "@lib/unsplash.ts";
|
||||
import { createLogger } from "@lib/log/index.ts";
|
||||
|
||||
function ext(str: string) {
|
||||
try {
|
||||
const u = new URL(str);
|
||||
if (u.searchParams.has("fm")) {
|
||||
return u.searchParams.get("fm")!;
|
||||
}
|
||||
return fileExtension(u.pathname);
|
||||
} catch (_e) {
|
||||
return fileExtension(str);
|
||||
}
|
||||
}
|
||||
|
||||
const log = createLogger("api/article/enhance");
|
||||
|
||||
async function getUnsplashCoverImage(
|
||||
content: string,
|
||||
streamResponse: ReturnType<typeof createStreamResponse>,
|
||||
): Promise<string | undefined> {
|
||||
try {
|
||||
streamResponse.info("creating unsplash search term");
|
||||
const searchTerm = await openai.createUnsplashSearchTerm(content);
|
||||
if (!searchTerm) return;
|
||||
streamResponse.info(`searching for ${searchTerm}`);
|
||||
const unsplashUrl = await unsplash.getImageBySearchTerm(searchTerm);
|
||||
return unsplashUrl;
|
||||
} catch (e) {
|
||||
log.error("Failed to get unsplash cover image", e);
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
async function fetchAndStoreCover(
|
||||
imageUrl: string | undefined,
|
||||
title: string,
|
||||
streamResponse: ReturnType<typeof createStreamResponse>,
|
||||
): Promise<string | undefined> {
|
||||
if (!imageUrl) return;
|
||||
const imagePath = `articles/images/${safeFileName(title)}_cover.${
|
||||
ext(imageUrl)
|
||||
}`;
|
||||
try {
|
||||
streamResponse.info("downloading cover");
|
||||
const res = await fetch(imageUrl);
|
||||
if (!res.ok) {
|
||||
log.error(`Failed to download remote image: ${imageUrl}`, {
|
||||
status: res.status,
|
||||
});
|
||||
return;
|
||||
}
|
||||
const buffer = await res.arrayBuffer();
|
||||
streamResponse.info("saving cover");
|
||||
await createResource(imagePath, buffer);
|
||||
return `resources/${imagePath}`;
|
||||
} catch (err) {
|
||||
log.error(`Failed to save image: ${imageUrl}`, err);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
async function processEnhanceArticle(
|
||||
name: string,
|
||||
streamResponse: ReturnType<typeof createStreamResponse>,
|
||||
) {
|
||||
const article = await fetchResource<ArticleResource>(
|
||||
`articles/${name}`,
|
||||
);
|
||||
if (!article) {
|
||||
throw new NotFoundError();
|
||||
}
|
||||
|
||||
const fetchUrl = article.content?.url;
|
||||
if (!fetchUrl) {
|
||||
throw new BadRequestError("Article has no URL to enhance from.");
|
||||
}
|
||||
|
||||
log.info("enhancing article from url", { url: fetchUrl });
|
||||
streamResponse.info("scraping url");
|
||||
const result = await webScrape(fetchUrl, streamResponse);
|
||||
|
||||
streamResponse.info("parsing content");
|
||||
|
||||
log.debug("downloaded and parsed", result);
|
||||
|
||||
streamResponse.info("extracting metadata with openai");
|
||||
const aiMeta = await openai.extractArticleMetadata(result.markdown);
|
||||
|
||||
const title = result?.title || aiMeta?.headline ||
|
||||
article.content?.headline || "";
|
||||
|
||||
article.content ??= {
|
||||
_type: "Article",
|
||||
headline: title,
|
||||
url: fetchUrl,
|
||||
};
|
||||
|
||||
article.content.articleBody = result.markdown;
|
||||
article.content.datePublished ??= formatDate(
|
||||
result?.published || aiMeta?.datePublished || undefined,
|
||||
);
|
||||
|
||||
if (!article.content.author?.name || article.content.author.name === "") {
|
||||
article.content.author = {
|
||||
_type: "Person",
|
||||
name: (result.schemaOrgData?.author?.name || aiMeta?.author || "")
|
||||
.replace(
|
||||
"@",
|
||||
"twitter:",
|
||||
),
|
||||
};
|
||||
}
|
||||
|
||||
if (!article.content.image) {
|
||||
let coverImagePath: string | undefined = undefined;
|
||||
if (result?.image?.length) {
|
||||
log.debug("using local image for cover image", { image: result.image });
|
||||
coverImagePath = await fetchAndStoreCover(
|
||||
result.image,
|
||||
title,
|
||||
streamResponse,
|
||||
);
|
||||
} else {
|
||||
const urlPath = await getUnsplashCoverImage(
|
||||
result.content,
|
||||
streamResponse,
|
||||
);
|
||||
coverImagePath = await fetchAndStoreCover(urlPath, title, streamResponse);
|
||||
log.debug("using unsplash for cover image", { image: coverImagePath });
|
||||
}
|
||||
if (coverImagePath) {
|
||||
article.content.image = coverImagePath;
|
||||
}
|
||||
}
|
||||
|
||||
log.debug("writing to disk", {
|
||||
name: name,
|
||||
article: {
|
||||
...article,
|
||||
content: {
|
||||
...article.content,
|
||||
articleBody: article.content.articleBody?.slice(0, 200),
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
streamResponse.info("writing to disk");
|
||||
await createResource(`articles/${name}`, article.content);
|
||||
streamResponse.send({ type: "finished", url: name.replace(/$\.md/, "") });
|
||||
}
|
||||
|
||||
const POST = (
|
||||
_req: Request,
|
||||
ctx: FreshContext,
|
||||
): Response => {
|
||||
const session = ctx.state.session;
|
||||
if (!session) {
|
||||
throw new AccessDeniedError();
|
||||
}
|
||||
|
||||
const streamResponse = createStreamResponse();
|
||||
|
||||
processEnhanceArticle(ctx.params.name, streamResponse)
|
||||
.catch((err) => {
|
||||
log.error(err);
|
||||
streamResponse.error(err.message);
|
||||
})
|
||||
.finally(() => {
|
||||
streamResponse.cancel();
|
||||
});
|
||||
|
||||
return streamResponse.response;
|
||||
};
|
||||
|
||||
export const handler: Handlers = {
|
||||
POST,
|
||||
};
|
||||
Reference in New Issue
Block a user