import { FreshContext, Handlers } from "$fresh/server.ts"; import { fileExtension } from "https://deno.land/x/file_extension@v2.1.0/mod.ts"; import { formatDate, safeFileName } from "@lib/string.ts"; import { createStreamResponse } from "@lib/helpers.ts"; import { AccessDeniedError, BadRequestError, NotFoundError, } from "@lib/errors.ts"; import { createResource, fetchResource } from "@lib/marka/index.ts"; import { ArticleResource } from "@lib/marka/schema.ts"; import { webScrape } from "@lib/webScraper.ts"; import * as openai from "@lib/openai.ts"; import * as unsplash from "@lib/unsplash.ts"; import { createLogger } from "@lib/log/index.ts"; function ext(str: string) { try { const u = new URL(str); if (u.searchParams.has("fm")) { return u.searchParams.get("fm")!; } return fileExtension(u.pathname); } catch (_e) { return fileExtension(str); } } const log = createLogger("api/article/enhance"); async function getUnsplashCoverImage( content: string, streamResponse: ReturnType, ): Promise { try { streamResponse.info("creating unsplash search term"); const searchTerm = await openai.createUnsplashSearchTerm(content); if (!searchTerm) return; streamResponse.info(`searching for ${searchTerm}`); const unsplashUrl = await unsplash.getImageBySearchTerm(searchTerm); return unsplashUrl; } catch (e) { log.error("Failed to get unsplash cover image", e); return undefined; } } async function fetchAndStoreCover( imageUrl: string | undefined, title: string, streamResponse: ReturnType, ): Promise { if (!imageUrl) return; const imagePath = `articles/images/${safeFileName(title)}_cover.${ ext(imageUrl) }`; try { streamResponse.info("downloading cover"); const res = await fetch(imageUrl); if (!res.ok) { log.error(`Failed to download remote image: ${imageUrl}`, { status: res.status, }); return; } const buffer = await res.arrayBuffer(); streamResponse.info("saving cover"); await createResource(imagePath, buffer); return `resources/${imagePath}`; } catch (err) { log.error(`Failed to save image: ${imageUrl}`, err); return; } } async function processEnhanceArticle( name: string, streamResponse: ReturnType, ) { const article = await fetchResource( `articles/${name}`, ); if (!article) { throw new NotFoundError(); } const fetchUrl = article.content?.url; if (!fetchUrl) { throw new BadRequestError("Article has no URL to enhance from."); } log.info("enhancing article from url", { url: fetchUrl }); streamResponse.info("scraping url"); const result = await webScrape(fetchUrl, streamResponse); streamResponse.info("parsing content"); log.debug("downloaded and parsed", result); streamResponse.info("extracting metadata with openai"); const aiMeta = await openai.extractArticleMetadata(result.markdown); const title = result?.title || aiMeta?.headline || article.content?.headline || ""; article.content ??= { _type: "Article", headline: title, url: fetchUrl, }; article.content.articleBody = result.markdown; article.content.datePublished ??= formatDate( result?.published || aiMeta?.datePublished || undefined, ); if (!article.content.author?.name || article.content.author.name === "") { article.content.author = { _type: "Person", name: (result.schemaOrgData?.author?.name || aiMeta?.author || "") .replace( "@", "twitter:", ), }; } if (!article.content.image) { let coverImagePath: string | undefined = undefined; if (result?.image?.length) { log.debug("using local image for cover image", { image: result.image }); coverImagePath = await fetchAndStoreCover( result.image, title, streamResponse, ); } else { const urlPath = await getUnsplashCoverImage( result.content, streamResponse, ); coverImagePath = await fetchAndStoreCover(urlPath, title, streamResponse); log.debug("using unsplash for cover image", { image: coverImagePath }); } if (coverImagePath) { article.content.image = coverImagePath; } } log.debug("writing to disk", { name: name, article: { ...article, content: { ...article.content, articleBody: article.content.articleBody?.slice(0, 200), }, }, }); streamResponse.info("writing to disk"); await createResource(`articles/${name}`, article.content); streamResponse.send({ type: "finished", url: name.replace(/$\.md/, "") }); } const POST = ( _req: Request, ctx: FreshContext, ): Response => { const session = ctx.state.session; if (!session) { throw new AccessDeniedError(); } const streamResponse = createStreamResponse(); processEnhanceArticle(ctx.params.name, streamResponse) .catch((err) => { log.error(err); streamResponse.error(err.message); }) .finally(() => { streamResponse.cancel(); }); return streamResponse.response; }; export const handler: Handlers = { POST, };