import { Handlers } from "$fresh/server.ts"; import { Defuddle } from "defuddle/node"; import { AccessDeniedError, BadRequestError } from "@lib/errors.ts"; import { createStreamResponse, isValidUrl } from "@lib/helpers.ts"; import * as openai from "@lib/openai.ts"; import { getYoutubeVideoDetails } from "@lib/youtube.ts"; import { extractYoutubeId, formatDate, isYoutubeLink, safeFileName, toUrlSafeString, } from "@lib/string.ts"; import { createLogger } from "@lib/log/index.ts"; import { createResource } from "@lib/marka/index.ts"; import { webScrape } from "@lib/webScraper.ts"; import { ArticleResource } from "@lib/marka/schema.ts"; import { fileExtension } from "https://deno.land/x/file_extension@v2.1.0/mod.ts"; const log = createLogger("api/article"); async function fetchAndStoreCover( imageUrl: string | undefined, title: string, streamResponse?: ReturnType, ): Promise { if (!imageUrl) return; const imagePath = `articles/images/${safeFileName(title)}_cover.${ fileExtension(imageUrl) }`; try { streamResponse?.enqueue("downloading image"); const res = await fetch(imageUrl); streamResponse?.enqueue("saving image"); if (!res.ok) { console.log(`Failed to download remote image: ${imageUrl}`, res.status); return; } const buffer = await res.arrayBuffer(); await createResource(imagePath, buffer); return `resources/${imagePath}`; } catch (err) { console.log(`Failed to save image: ${imageUrl}`, err); return; } } async function processCreateArticle( { fetchUrl, streamResponse }: { fetchUrl: string; streamResponse: ReturnType; }, ) { log.info("create article from url", { url: fetchUrl }); streamResponse.enqueue("downloading article"); const doc = await webScrape(fetchUrl, streamResponse); const result = await Defuddle(doc, fetchUrl, { markdown: true, }); log.debug("downloaded and parse parsed", { ...result, url: fetchUrl, content: result.content.slice(0, 200), }); streamResponse.enqueue("parsed article, creating tags with openai"); const aiMeta = await openai.extractArticleMetadata(result.content); streamResponse.enqueue("postprocessing article"); const title = result?.title || aiMeta?.headline || ""; const coverImagePath = await fetchAndStoreCover( result.image, title, streamResponse, ); const newArticle: ArticleResource["content"] = { _type: "Article", headline: title, articleBody: result.content, url: fetchUrl, datePublished: formatDate( result?.published || aiMeta?.datePublished || undefined, ), image: coverImagePath, author: { _type: "Person", name: (result.schemaOrgData?.author?.name || aiMeta?.author || "") .replace( "@", "twitter:", ), }, } as const; streamResponse.enqueue("writing to disk"); log.debug("writing to disk", { ...newArticle, articleBody: newArticle.articleBody?.slice(0, 200), }); await createResource(`articles/${toUrlSafeString(title)}.md`, newArticle); streamResponse.enqueue("id: " + title); } async function processCreateYoutubeVideo( { fetchUrl, streamResponse }: { fetchUrl: string; streamResponse: ReturnType; }, ) { log.info("create youtube article from url", { url: fetchUrl, }); streamResponse.enqueue("getting video infos from youtube api"); const youtubeId = extractYoutubeId(fetchUrl); const video = await getYoutubeVideoDetails(youtubeId); streamResponse.enqueue("shortening title with openai"); const videoTitle = await openai.shortenTitle(video.snippet.title) || video.snippet.title; const thumbnail = video?.snippet?.thumbnails?.maxres; const coverImagePath = await fetchAndStoreCover( thumbnail.url, videoTitle || video.snippet.title, streamResponse, ); const newArticle: ArticleResource["content"] = { _type: "Article", headline: video.snippet.title, articleBody: video.snippet.description, image: coverImagePath, url: fetchUrl, datePublished: formatDate(video.snippet.publishedAt), author: { _type: "Person", name: video.snippet.channelTitle, }, }; streamResponse.enqueue("creating article"); await createResource( `articles/${toUrlSafeString(videoTitle)}.md`, newArticle, ); streamResponse.enqueue("finished"); streamResponse.enqueue("id: " + toUrlSafeString(videoTitle)); } export const handler: Handlers = { GET(req, ctx) { const session = ctx.state.session; if (!session) { throw new AccessDeniedError(); } const url = new URL(req.url); const fetchUrl = url.searchParams.get("url"); if (!fetchUrl || !isValidUrl(fetchUrl)) { throw new BadRequestError(); } const streamResponse = createStreamResponse(); if (isYoutubeLink(fetchUrl)) { processCreateYoutubeVideo({ fetchUrl, streamResponse }).then( (article) => { log.debug("created article from youtube", { article }); }, ).catch((err) => { log.error(err); }).finally(() => { streamResponse.cancel(); }); } else { processCreateArticle({ fetchUrl, streamResponse }).then((article) => { log.debug("created article from link", { article }); }).catch((err) => { log.error(err); }).finally(() => { streamResponse.cancel(); }); } return streamResponse.response; }, };