import { Handlers } from "$fresh/server.ts"; import { Defuddle } from "defuddle/node"; import { AccessDeniedError, BadRequestError } from "@lib/errors.ts"; import { createStreamResponse, isValidUrl } from "@lib/helpers.ts"; import * as openai from "@lib/openai.ts"; import * as unsplash from "@lib/unsplash.ts"; import { getYoutubeVideoDetails } from "@lib/youtube.ts"; import { extractYoutubeId, formatDate, isYoutubeLink, safeFileName, toUrlSafeString, } from "@lib/string.ts"; import { createLogger } from "@lib/log/index.ts"; import { createResource } from "@lib/marka/index.ts"; import { webScrape } from "@lib/webScraper.ts"; import { ArticleResource } from "@lib/marka/schema.ts"; import { fileExtension } from "https://deno.land/x/file_extension@v2.1.0/mod.ts"; const log = createLogger("api/article"); async function getUnsplashCoverImage( content: string, streamResponse: ReturnType, ): Promise { try { streamResponse.info("creating unsplash search term"); const searchTerm = await openai.createUnsplashSearchTerm(content); if (!searchTerm) return; streamResponse.info(`searching for ${searchTerm}`); const unsplashUrl = await unsplash.getImageBySearchTerm(searchTerm); return unsplashUrl; } catch (e) { log.error("Failed to get unsplash cover image", e); return undefined; } } function ext(str: string) { try { const u = new URL(str); if (u.searchParams.has("fm")) { return u.searchParams.get("fm")!; } return fileExtension(u.pathname); } catch (_e) { return fileExtension(str); } } async function fetchAndStoreCover( imageUrl: string | undefined, title: string, streamResponse?: ReturnType, ): Promise { if (!imageUrl) return; const imagePath = `articles/images/${safeFileName(title)}_cover.${ ext(imageUrl) }`; try { streamResponse?.info("downloading image"); const res = await fetch(imageUrl); streamResponse?.info("saving image"); if (!res.ok) { console.log(`Failed to download remote image: ${imageUrl}`, res.status); return; } const buffer = await res.arrayBuffer(); await createResource(imagePath, buffer); return `resources/${imagePath}`; } catch (err) { console.log(`Failed to save image: ${imageUrl}`, err); return; } } async function processCreateArticle( { fetchUrl, streamResponse }: { fetchUrl: string; streamResponse: ReturnType; }, ) { log.info("create article from url", { url: fetchUrl }); streamResponse.info("downloading article"); const result = await webScrape(fetchUrl, streamResponse); log.debug("downloaded and parse parsed", result); streamResponse.info("parsed article, creating tags with openai"); const aiMeta = await openai.extractArticleMetadata(result.markdown); streamResponse.info("postprocessing article"); const title = result?.title || aiMeta?.headline || ""; let coverImagePath: string | undefined = undefined; if (result?.image?.length) { log.debug("using local image for cover image", { image: result.image }); coverImagePath = await fetchAndStoreCover( result.image, title, streamResponse, ); } else { const urlPath = await getUnsplashCoverImage( result.markdown, streamResponse, ); coverImagePath = await fetchAndStoreCover(urlPath, title, streamResponse); log.debug("using unsplash for cover image", { image: coverImagePath }); } const url = toUrlSafeString(title); const newArticle: ArticleResource["content"] = { _type: "Article", headline: title, articleBody: result.markdown, url: fetchUrl, datePublished: formatDate( result?.published || aiMeta?.datePublished || undefined, ), image: coverImagePath, author: { _type: "Person", name: (result.schemaOrgData?.author?.name || aiMeta?.author || "") .replace( "@", "twitter:", ), }, } as const; streamResponse.info("writing to disk"); log.debug("writing to disk", { ...newArticle, articleBody: newArticle.articleBody?.slice(0, 200), }); await createResource(`articles/${url}.md`, newArticle); streamResponse.send({ type: "finished", url }); } async function processCreateYoutubeVideo( { fetchUrl, streamResponse }: { fetchUrl: string; streamResponse: ReturnType; }, ) { log.info("create youtube article from url", { url: fetchUrl, }); streamResponse.info("getting video infos from youtube api"); const youtubeId = extractYoutubeId(fetchUrl); const video = await getYoutubeVideoDetails(youtubeId); streamResponse.info("shortening title with openai"); const videoTitle = await openai.shortenTitle(video.snippet.title) || video.snippet.title; const thumbnail = video?.snippet?.thumbnails?.maxres; const coverImagePath = await fetchAndStoreCover( thumbnail.url, videoTitle || video.snippet.title, streamResponse, ); const newArticle: ArticleResource["content"] = { _type: "Article", headline: video.snippet.title, articleBody: video.snippet.description, image: coverImagePath, url: fetchUrl, datePublished: formatDate(video.snippet.publishedAt), author: { _type: "Person", name: video.snippet.channelTitle, }, }; streamResponse.info("creating article"); const filename = toUrlSafeString(videoTitle); await createResource( `articles/${filename}.md`, newArticle, ); streamResponse.info("finished"); streamResponse.send({ type: "finished", url: filename }); } export const handler: Handlers = { GET(req, ctx) { const session = ctx.state.session; if (!session) { throw new AccessDeniedError(); } const url = new URL(req.url); const fetchUrl = url.searchParams.get("url"); if (!fetchUrl || !isValidUrl(fetchUrl)) { throw new BadRequestError(); } const streamResponse = createStreamResponse(); if (isYoutubeLink(fetchUrl)) { processCreateYoutubeVideo({ fetchUrl, streamResponse }).then( (article) => { log.debug("created article from youtube", { article }); }, ).catch((err) => { log.error(err); }).finally(() => { streamResponse.cancel(); }); } else { processCreateArticle({ fetchUrl, streamResponse }).then((article) => { log.debug("created article from link", { article }); }).catch((err) => { log.error(err); }).finally(() => { streamResponse.cancel(); }); } return streamResponse.response; }, };