import { Handlers } from "$fresh/server.ts"; import { Defuddle } from "defuddle/node"; import { AccessDeniedError, BadRequestError } from "@lib/errors.ts"; import { createStreamResponse, isValidUrl } from "@lib/helpers.ts"; import * as openai from "@lib/openai.ts"; import { getYoutubeVideoDetails } from "@lib/youtube.ts"; import { extractYoutubeId, formatDate, isYoutubeLink, safeFileName, toUrlSafeString, } from "@lib/string.ts"; import { createLogger } from "@lib/log/index.ts"; import { createResource } from "@lib/marka/index.ts"; import { webScrape } from "@lib/webScraper.ts"; import { ArticleResource } from "@lib/marka/schema.ts"; import { fileExtension } from "https://deno.land/x/file_extension@v2.1.0/mod.ts"; const log = createLogger("api/article"); async function processCreateArticle( { fetchUrl, streamResponse }: { fetchUrl: string; streamResponse: ReturnType; }, ) { log.info("create article from url", { url: fetchUrl }); streamResponse.enqueue("downloading article"); const doc = await webScrape(fetchUrl, streamResponse); const result = await Defuddle(doc, fetchUrl, { markdown: true, }); log.debug("downloaded and parse parsed", { url: fetchUrl, content: result.content, }); streamResponse.enqueue("parsed article, creating tags with openai"); const aiMeta = await openai.extractArticleMetadata(result.content); streamResponse.enqueue("postprocessing article"); const title = result?.title || aiMeta?.headline || ""; let finalPath = result.image; if (result?.image) { const extension = fileExtension(result?.image); const imagePath = `resources/articles/images/${ safeFileName(title) }_cover.${extension}`; try { streamResponse.enqueue("downloading image"); const res = await fetch(result.image); streamResponse.enqueue("saving image"); const buffer = await res.arrayBuffer(); await createResource(imagePath, buffer); finalPath = imagePath; } catch (err) { console.log("Failed to save image", err); } } const newArticle: ArticleResource["content"] = { _type: "Article", headline: title, articleBody: result.content, url: fetchUrl, datePublished: formatDate( result?.published || aiMeta?.datePublished || undefined, ), image: finalPath, author: { _type: "Person", name: (result.schemaOrgData?.author?.name || aiMeta?.author || "") .replace( "@", "twitter:", ), }, } as const; streamResponse.enqueue("writing to disk"); await createResource(`articles/${toUrlSafeString(title)}.md`, newArticle); streamResponse.enqueue("id: " + title); } async function processCreateYoutubeVideo( { fetchUrl, streamResponse }: { fetchUrl: string; streamResponse: ReturnType; }, ) { log.info("create youtube article from url", { url: fetchUrl, }); streamResponse.enqueue("getting video infos from youtube api"); const youtubeId = extractYoutubeId(fetchUrl); const video = await getYoutubeVideoDetails(youtubeId); streamResponse.enqueue("shortening title with openai"); const newId = await openai.shortenTitle(video.snippet.title); const id = newId || youtubeId; const newArticle: ArticleResource["content"] = { _type: "Article", headline: video.snippet.title, articleBody: video.snippet.description, url: fetchUrl, datePublished: new Date(video.snippet.publishedAt).toISOString(), author: { _type: "Person", name: video.snippet.channelTitle, }, }; streamResponse.enqueue("creating article"); await createResource(`articles/${id}.md`, newArticle); streamResponse.enqueue("finished"); streamResponse.enqueue("id: " + id); } export const handler: Handlers = { GET(req, ctx) { const session = ctx.state.session; if (!session) { throw new AccessDeniedError(); } const url = new URL(req.url); const fetchUrl = url.searchParams.get("url"); if (!fetchUrl || !isValidUrl(fetchUrl)) { throw new BadRequestError(); } const streamResponse = createStreamResponse(); if (isYoutubeLink(fetchUrl)) { processCreateYoutubeVideo({ fetchUrl, streamResponse }).then( (article) => { log.debug("created article from youtube", { article }); }, ).catch((err) => { log.error(err); }).finally(() => { streamResponse.cancel(); }); } else { processCreateArticle({ fetchUrl, streamResponse }).then((article) => { log.debug("created article from link", { article }); }).catch((err) => { log.error(err); }).finally(() => { streamResponse.cancel(); }); } return streamResponse.response; }, };