feat: fallback to unsplash cover when article contains no image
This commit is contained in:
@@ -3,6 +3,7 @@ import { Defuddle } from "defuddle/node";
|
||||
import { AccessDeniedError, BadRequestError } from "@lib/errors.ts";
|
||||
import { createStreamResponse, isValidUrl } from "@lib/helpers.ts";
|
||||
import * as openai from "@lib/openai.ts";
|
||||
import * as unsplash from "@lib/unsplash.ts";
|
||||
import { getYoutubeVideoDetails } from "@lib/youtube.ts";
|
||||
import {
|
||||
extractYoutubeId,
|
||||
@@ -19,6 +20,35 @@ import { fileExtension } from "https://deno.land/x/file_extension@v2.1.0/mod.ts"
|
||||
|
||||
const log = createLogger("api/article");
|
||||
|
||||
async function getUnsplashCoverImage(
|
||||
content: string,
|
||||
streamResponse: ReturnType<typeof createStreamResponse>,
|
||||
): Promise<string | undefined> {
|
||||
try {
|
||||
streamResponse.info("creating unsplash search term");
|
||||
const searchTerm = await openai.createUnsplashSearchTerm(content);
|
||||
if (!searchTerm) return;
|
||||
streamResponse.info(`searching for ${searchTerm}`);
|
||||
const unsplashUrl = await unsplash.getImageBySearchTerm(searchTerm);
|
||||
return unsplashUrl;
|
||||
} catch (e) {
|
||||
log.error("Failed to get unsplash cover image", e);
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
function ext(str: string) {
|
||||
try {
|
||||
const u = new URL(str);
|
||||
if (u.searchParams.has("fm")) {
|
||||
return u.searchParams.get("fm")!;
|
||||
}
|
||||
return fileExtension(u.pathname);
|
||||
} catch (_e) {
|
||||
return fileExtension(str);
|
||||
}
|
||||
}
|
||||
|
||||
async function fetchAndStoreCover(
|
||||
imageUrl: string | undefined,
|
||||
title: string,
|
||||
@@ -26,12 +56,12 @@ async function fetchAndStoreCover(
|
||||
): Promise<string | undefined> {
|
||||
if (!imageUrl) return;
|
||||
const imagePath = `articles/images/${safeFileName(title)}_cover.${
|
||||
fileExtension(imageUrl)
|
||||
ext(imageUrl)
|
||||
}`;
|
||||
try {
|
||||
streamResponse?.enqueue("downloading image");
|
||||
streamResponse?.info("downloading image");
|
||||
const res = await fetch(imageUrl);
|
||||
streamResponse?.enqueue("saving image");
|
||||
streamResponse?.info("saving image");
|
||||
if (!res.ok) {
|
||||
console.log(`Failed to download remote image: ${imageUrl}`, res.status);
|
||||
return;
|
||||
@@ -53,38 +83,43 @@ async function processCreateArticle(
|
||||
) {
|
||||
log.info("create article from url", { url: fetchUrl });
|
||||
|
||||
streamResponse.enqueue("downloading article");
|
||||
streamResponse.info("downloading article");
|
||||
|
||||
const doc = await webScrape(fetchUrl, streamResponse);
|
||||
const result = await webScrape(fetchUrl, streamResponse);
|
||||
|
||||
const result = await Defuddle(doc, fetchUrl, {
|
||||
markdown: true,
|
||||
});
|
||||
log.debug("downloaded and parse parsed", result);
|
||||
|
||||
log.debug("downloaded and parse parsed", {
|
||||
...result,
|
||||
url: fetchUrl,
|
||||
content: result.content.slice(0, 200),
|
||||
});
|
||||
streamResponse.info("parsed article, creating tags with openai");
|
||||
|
||||
streamResponse.enqueue("parsed article, creating tags with openai");
|
||||
const aiMeta = await openai.extractArticleMetadata(result.markdown);
|
||||
|
||||
const aiMeta = await openai.extractArticleMetadata(result.content);
|
||||
|
||||
streamResponse.enqueue("postprocessing article");
|
||||
streamResponse.info("postprocessing article");
|
||||
|
||||
const title = result?.title || aiMeta?.headline || "";
|
||||
|
||||
const coverImagePath = await fetchAndStoreCover(
|
||||
result.image,
|
||||
title,
|
||||
streamResponse,
|
||||
);
|
||||
let coverImagePath: string | undefined = undefined;
|
||||
if (result?.image?.length) {
|
||||
log.debug("using local image for cover image", { image: result.image });
|
||||
coverImagePath = await fetchAndStoreCover(
|
||||
result.image,
|
||||
title,
|
||||
streamResponse,
|
||||
);
|
||||
} else {
|
||||
const urlPath = await getUnsplashCoverImage(
|
||||
result.markdown,
|
||||
streamResponse,
|
||||
);
|
||||
coverImagePath = await fetchAndStoreCover(urlPath, title, streamResponse);
|
||||
log.debug("using unsplash for cover image", { image: coverImagePath });
|
||||
}
|
||||
|
||||
const url = toUrlSafeString(title);
|
||||
|
||||
const newArticle: ArticleResource["content"] = {
|
||||
_type: "Article",
|
||||
headline: title,
|
||||
articleBody: result.content,
|
||||
articleBody: result.markdown,
|
||||
url: fetchUrl,
|
||||
datePublished: formatDate(
|
||||
result?.published || aiMeta?.datePublished || undefined,
|
||||
@@ -100,16 +135,16 @@ async function processCreateArticle(
|
||||
},
|
||||
} as const;
|
||||
|
||||
streamResponse.enqueue("writing to disk");
|
||||
streamResponse.info("writing to disk");
|
||||
|
||||
log.debug("writing to disk", {
|
||||
...newArticle,
|
||||
articleBody: newArticle.articleBody?.slice(0, 200),
|
||||
});
|
||||
|
||||
await createResource(`articles/${toUrlSafeString(title)}.md`, newArticle);
|
||||
await createResource(`articles/${url}.md`, newArticle);
|
||||
|
||||
streamResponse.enqueue("id: " + title);
|
||||
streamResponse.send({ type: "finished", url });
|
||||
}
|
||||
|
||||
async function processCreateYoutubeVideo(
|
||||
@@ -122,13 +157,13 @@ async function processCreateYoutubeVideo(
|
||||
url: fetchUrl,
|
||||
});
|
||||
|
||||
streamResponse.enqueue("getting video infos from youtube api");
|
||||
streamResponse.info("getting video infos from youtube api");
|
||||
|
||||
const youtubeId = extractYoutubeId(fetchUrl);
|
||||
|
||||
const video = await getYoutubeVideoDetails(youtubeId);
|
||||
|
||||
streamResponse.enqueue("shortening title with openai");
|
||||
streamResponse.info("shortening title with openai");
|
||||
const videoTitle = await openai.shortenTitle(video.snippet.title) ||
|
||||
video.snippet.title;
|
||||
|
||||
@@ -152,16 +187,18 @@ async function processCreateYoutubeVideo(
|
||||
},
|
||||
};
|
||||
|
||||
streamResponse.enqueue("creating article");
|
||||
streamResponse.info("creating article");
|
||||
|
||||
const filename = toUrlSafeString(videoTitle);
|
||||
|
||||
await createResource(
|
||||
`articles/${toUrlSafeString(videoTitle)}.md`,
|
||||
`articles/${filename}.md`,
|
||||
newArticle,
|
||||
);
|
||||
|
||||
streamResponse.enqueue("finished");
|
||||
streamResponse.info("finished");
|
||||
|
||||
streamResponse.enqueue("id: " + toUrlSafeString(videoTitle));
|
||||
streamResponse.send({ type: "finished", url: filename });
|
||||
}
|
||||
|
||||
export const handler: Handlers = {
|
||||
|
||||
191
routes/api/articles/enhance/[name].ts
Normal file
191
routes/api/articles/enhance/[name].ts
Normal file
@@ -0,0 +1,191 @@
|
||||
import { FreshContext, Handlers } from "$fresh/server.ts";
|
||||
import { fileExtension } from "https://deno.land/x/file_extension@v2.1.0/mod.ts";
|
||||
import { formatDate, safeFileName } from "@lib/string.ts";
|
||||
import { createStreamResponse } from "@lib/helpers.ts";
|
||||
import {
|
||||
AccessDeniedError,
|
||||
BadRequestError,
|
||||
NotFoundError,
|
||||
} from "@lib/errors.ts";
|
||||
import { createResource, fetchResource } from "@lib/marka/index.ts";
|
||||
import { ArticleResource } from "@lib/marka/schema.ts";
|
||||
import { webScrape } from "@lib/webScraper.ts";
|
||||
import * as openai from "@lib/openai.ts";
|
||||
import * as unsplash from "@lib/unsplash.ts";
|
||||
import { createLogger } from "@lib/log/index.ts";
|
||||
|
||||
function ext(str: string) {
|
||||
try {
|
||||
const u = new URL(str);
|
||||
if (u.searchParams.has("fm")) {
|
||||
return u.searchParams.get("fm")!;
|
||||
}
|
||||
return fileExtension(u.pathname);
|
||||
} catch (_e) {
|
||||
return fileExtension(str);
|
||||
}
|
||||
}
|
||||
|
||||
const log = createLogger("api/article/enhance");
|
||||
|
||||
async function getUnsplashCoverImage(
|
||||
content: string,
|
||||
streamResponse: ReturnType<typeof createStreamResponse>,
|
||||
): Promise<string | undefined> {
|
||||
try {
|
||||
streamResponse.info("creating unsplash search term");
|
||||
const searchTerm = await openai.createUnsplashSearchTerm(content);
|
||||
if (!searchTerm) return;
|
||||
streamResponse.info(`searching for ${searchTerm}`);
|
||||
const unsplashUrl = await unsplash.getImageBySearchTerm(searchTerm);
|
||||
return unsplashUrl;
|
||||
} catch (e) {
|
||||
log.error("Failed to get unsplash cover image", e);
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
async function fetchAndStoreCover(
|
||||
imageUrl: string | undefined,
|
||||
title: string,
|
||||
streamResponse: ReturnType<typeof createStreamResponse>,
|
||||
): Promise<string | undefined> {
|
||||
if (!imageUrl) return;
|
||||
const imagePath = `articles/images/${safeFileName(title)}_cover.${
|
||||
ext(imageUrl)
|
||||
}`;
|
||||
try {
|
||||
streamResponse.info("downloading cover");
|
||||
const res = await fetch(imageUrl);
|
||||
if (!res.ok) {
|
||||
log.error(`Failed to download remote image: ${imageUrl}`, {
|
||||
status: res.status,
|
||||
});
|
||||
return;
|
||||
}
|
||||
const buffer = await res.arrayBuffer();
|
||||
streamResponse.info("saving cover");
|
||||
await createResource(imagePath, buffer);
|
||||
return `resources/${imagePath}`;
|
||||
} catch (err) {
|
||||
log.error(`Failed to save image: ${imageUrl}`, err);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
async function processEnhanceArticle(
|
||||
name: string,
|
||||
streamResponse: ReturnType<typeof createStreamResponse>,
|
||||
) {
|
||||
const article = await fetchResource<ArticleResource>(
|
||||
`articles/${name}`,
|
||||
);
|
||||
if (!article) {
|
||||
throw new NotFoundError();
|
||||
}
|
||||
|
||||
const fetchUrl = article.content?.url;
|
||||
if (!fetchUrl) {
|
||||
throw new BadRequestError("Article has no URL to enhance from.");
|
||||
}
|
||||
|
||||
log.info("enhancing article from url", { url: fetchUrl });
|
||||
streamResponse.info("scraping url");
|
||||
const result = await webScrape(fetchUrl, streamResponse);
|
||||
|
||||
streamResponse.info("parsing content");
|
||||
|
||||
log.debug("downloaded and parsed", result);
|
||||
|
||||
streamResponse.info("extracting metadata with openai");
|
||||
const aiMeta = await openai.extractArticleMetadata(result.markdown);
|
||||
|
||||
const title = result?.title || aiMeta?.headline ||
|
||||
article.content?.headline || "";
|
||||
|
||||
article.content ??= {
|
||||
_type: "Article",
|
||||
headline: title,
|
||||
url: fetchUrl,
|
||||
};
|
||||
|
||||
article.content.articleBody = result.markdown;
|
||||
article.content.datePublished ??= formatDate(
|
||||
result?.published || aiMeta?.datePublished || undefined,
|
||||
);
|
||||
|
||||
if (!article.content.author?.name || article.content.author.name === "") {
|
||||
article.content.author = {
|
||||
_type: "Person",
|
||||
name: (result.schemaOrgData?.author?.name || aiMeta?.author || "")
|
||||
.replace(
|
||||
"@",
|
||||
"twitter:",
|
||||
),
|
||||
};
|
||||
}
|
||||
|
||||
if (!article.content.image) {
|
||||
let coverImagePath: string | undefined = undefined;
|
||||
if (result?.image?.length) {
|
||||
log.debug("using local image for cover image", { image: result.image });
|
||||
coverImagePath = await fetchAndStoreCover(
|
||||
result.image,
|
||||
title,
|
||||
streamResponse,
|
||||
);
|
||||
} else {
|
||||
const urlPath = await getUnsplashCoverImage(
|
||||
result.content,
|
||||
streamResponse,
|
||||
);
|
||||
coverImagePath = await fetchAndStoreCover(urlPath, title, streamResponse);
|
||||
log.debug("using unsplash for cover image", { image: coverImagePath });
|
||||
}
|
||||
if (coverImagePath) {
|
||||
article.content.image = coverImagePath;
|
||||
}
|
||||
}
|
||||
|
||||
log.debug("writing to disk", {
|
||||
name: name,
|
||||
article: {
|
||||
...article,
|
||||
content: {
|
||||
...article.content,
|
||||
articleBody: article.content.articleBody?.slice(0, 200),
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
streamResponse.info("writing to disk");
|
||||
await createResource(`articles/${name}`, article.content);
|
||||
streamResponse.send({ type: "finished", url: name.replace(/$\.md/, "") });
|
||||
}
|
||||
|
||||
const POST = (
|
||||
_req: Request,
|
||||
ctx: FreshContext,
|
||||
): Response => {
|
||||
const session = ctx.state.session;
|
||||
if (!session) {
|
||||
throw new AccessDeniedError();
|
||||
}
|
||||
|
||||
const streamResponse = createStreamResponse();
|
||||
|
||||
processEnhanceArticle(ctx.params.name, streamResponse)
|
||||
.catch((err) => {
|
||||
log.error(err);
|
||||
streamResponse.error(err.message);
|
||||
})
|
||||
.finally(() => {
|
||||
streamResponse.cancel();
|
||||
});
|
||||
|
||||
return streamResponse.response;
|
||||
};
|
||||
|
||||
export const handler: Handlers = {
|
||||
POST,
|
||||
};
|
||||
@@ -2,7 +2,7 @@ import { Handlers } from "$fresh/server.ts";
|
||||
import { json } from "@lib/helpers.ts";
|
||||
|
||||
export const handler: Handlers = {
|
||||
async GET() {
|
||||
GET() {
|
||||
return json([]);
|
||||
},
|
||||
};
|
||||
|
||||
@@ -10,7 +10,6 @@ import { parseJsonLdToRecipeSchema } from "./parseJsonLd.ts";
|
||||
import z from "zod";
|
||||
import { createResource } from "@lib/marka/index.ts";
|
||||
import { webScrape } from "@lib/webScraper.ts";
|
||||
import { Defuddle } from "defuddle/node";
|
||||
import { RecipeResource } from "@lib/marka/schema.ts";
|
||||
|
||||
const log = createLogger("api/article");
|
||||
@@ -23,18 +22,14 @@ async function processCreateRecipeFromUrl(
|
||||
) {
|
||||
log.info("create article from url", { url: fetchUrl });
|
||||
|
||||
streamResponse.enqueue("downloading article");
|
||||
streamResponse.info("downloading article");
|
||||
|
||||
const doc = await webScrape(fetchUrl, streamResponse);
|
||||
const result = await webScrape(fetchUrl, streamResponse);
|
||||
|
||||
const result = await Defuddle(doc, fetchUrl, {
|
||||
markdown: true,
|
||||
});
|
||||
|
||||
streamResponse.enqueue("download success");
|
||||
streamResponse.info("download success");
|
||||
|
||||
const jsonLds = Array.from(
|
||||
doc?.querySelectorAll(
|
||||
result.dom?.querySelectorAll(
|
||||
"script[type='application/ld+json']",
|
||||
),
|
||||
) as unknown as HTMLScriptElement[];
|
||||
@@ -48,11 +43,11 @@ async function processCreateRecipeFromUrl(
|
||||
}
|
||||
|
||||
if (!recipe) {
|
||||
const res = await openai.extractRecipe(result.content);
|
||||
const res = await openai.extractRecipe(result.markdown);
|
||||
if (!res || "errorMessages" in res) {
|
||||
const errorMessage = res?.errorMessages?.[0] ||
|
||||
"could not extract recipe";
|
||||
streamResponse.enqueue(`failed to extract recipe: ${errorMessage}`);
|
||||
streamResponse.error(`failed to extract recipe: ${errorMessage}`);
|
||||
return;
|
||||
}
|
||||
recipe = res;
|
||||
@@ -61,7 +56,7 @@ async function processCreateRecipeFromUrl(
|
||||
const id = toUrlSafeString(recipe?.name || "");
|
||||
|
||||
if (!recipe) {
|
||||
streamResponse.enqueue("failed to parse recipe");
|
||||
streamResponse.error("failed to parse recipe");
|
||||
streamResponse.cancel();
|
||||
return;
|
||||
}
|
||||
@@ -80,11 +75,11 @@ async function processCreateRecipeFromUrl(
|
||||
const finalPath = `resources/recipes/images/${
|
||||
safeFileName(id)
|
||||
}_cover.${extension}`;
|
||||
streamResponse.enqueue("downloading image");
|
||||
streamResponse.info("downloading image");
|
||||
try {
|
||||
streamResponse.enqueue("downloading image");
|
||||
streamResponse.info("downloading image");
|
||||
const res = await fetch(newRecipe.image);
|
||||
streamResponse.enqueue("saving image");
|
||||
streamResponse.info("saving image");
|
||||
const buffer = await res.arrayBuffer();
|
||||
await createResource(finalPath, buffer);
|
||||
newRecipe.image = finalPath;
|
||||
@@ -93,11 +88,11 @@ async function processCreateRecipeFromUrl(
|
||||
}
|
||||
}
|
||||
|
||||
streamResponse.enqueue("finished processing, creating file");
|
||||
streamResponse.info("finished processing, creating file");
|
||||
|
||||
await createResource(`recipes/${id}.md`, newRecipe);
|
||||
|
||||
streamResponse.enqueue("id: " + id);
|
||||
streamResponse.send({ type: "finished", url: id });
|
||||
}
|
||||
|
||||
export const handler: Handlers = {
|
||||
@@ -119,7 +114,7 @@ export const handler: Handlers = {
|
||||
processCreateRecipeFromUrl({ fetchUrl, streamResponse }).then((article) => {
|
||||
log.debug("created article from link", { article });
|
||||
}).catch((err) => {
|
||||
streamResponse.enqueue(`error creating recipe: ${err}`);
|
||||
streamResponse.error(`creating recipe: ${err}`);
|
||||
log.error(err);
|
||||
}).finally(() => {
|
||||
streamResponse.cancel();
|
||||
|
||||
@@ -21,7 +21,7 @@ async function processUpdateRecommendations(
|
||||
return true;
|
||||
}) as ReviewResource[];
|
||||
|
||||
streamResponse.enqueue("Fetched all movies");
|
||||
streamResponse.info("fetched all movies");
|
||||
|
||||
let done = 0;
|
||||
const total = movies.length;
|
||||
@@ -41,7 +41,7 @@ async function processUpdateRecommendations(
|
||||
console.log(err);
|
||||
}
|
||||
done++;
|
||||
streamResponse.enqueue(
|
||||
streamResponse.info(
|
||||
`${Math.floor((done / total) * 100)}% [${
|
||||
done + 1
|
||||
}/${total}] ${movie.name}`,
|
||||
@@ -50,7 +50,7 @@ async function processUpdateRecommendations(
|
||||
console.log(err);
|
||||
});
|
||||
|
||||
streamResponse.enqueue("100% Finished");
|
||||
streamResponse.info("100% Finished");
|
||||
}
|
||||
|
||||
export const handler: Handlers = {
|
||||
|
||||
Reference in New Issue
Block a user