import { Handlers } from "$fresh/server.ts"; import { Defuddle } from "defuddle/node"; import { AccessDeniedError, BadRequestError } from "@lib/errors.ts"; import { createStreamResponse, isValidUrl } from "@lib/helpers.ts"; import * as openai from "@lib/openai.ts"; import { Article } from "@lib/resource/articles.ts"; import { getYoutubeVideoDetails } from "@lib/youtube.ts"; import { extractYoutubeId, isYoutubeLink, toUrlSafeString, } from "@lib/string.ts"; import { createLogger } from "@lib/log/index.ts"; import { createResource } from "@lib/marka.ts"; import { webScrape } from "@lib/webScraper.ts"; const log = createLogger("api/article"); async function processCreateArticle( { fetchUrl, streamResponse }: { fetchUrl: string; streamResponse: ReturnType; }, ) { log.info("create article from url", { url: fetchUrl }); streamResponse.enqueue("downloading article"); const doc = await webScrape(fetchUrl, streamResponse); const result = await Defuddle(doc, fetchUrl, { markdown: true, }); log.debug("downloaded and parse parsed", { url: fetchUrl, content: result.content, }); streamResponse.enqueue("parsed article, creating tags with openai"); const aiMeta = await openai.extractArticleMetadata(result.content); streamResponse.enqueue("postprocessing article"); const title = result?.title || aiMeta?.headline || ""; const id = toUrlSafeString(title); const newArticle: Article = { _type: "Article", headline: title, articleBody: result.content, url: fetchUrl, datePublished: result?.published || aiMeta?.datePublished || new Date().toISOString(), image: result?.image, author: { _type: "Person", name: (result.schemaOrgData?.author?.name || aiMeta?.author || "") .replace( "@", "twitter:", ), }, } as const; streamResponse.enqueue("writing to disk"); await createResource(`articles/${id}.md`, newArticle); streamResponse.enqueue("id: " + id); } async function processCreateYoutubeVideo( { fetchUrl, streamResponse }: { fetchUrl: string; streamResponse: ReturnType; }, ) { log.info("create youtube article from url", { url: fetchUrl, }); streamResponse.enqueue("getting video infos from youtube api"); const youtubeId = extractYoutubeId(fetchUrl); const video = await getYoutubeVideoDetails(youtubeId); streamResponse.enqueue("shortening title with openai"); const newId = await openai.shortenTitle(video.snippet.title); const id = newId || youtubeId; const newArticle: Article = { _type: "Article", headline: video.snippet.title, articleBody: video.snippet.description, url: fetchUrl, datePublished: new Date(video.snippet.publishedAt).toISOString(), author: { _type: "Person", name: video.snippet.channelTitle, }, }; streamResponse.enqueue("creating article"); await createResource(`articles/${id}.md`, newArticle); streamResponse.enqueue("finished"); streamResponse.enqueue("id: " + id); } export const handler: Handlers = { GET(req, ctx) { const session = ctx.state.session; if (!session) { throw new AccessDeniedError(); } const url = new URL(req.url); const fetchUrl = url.searchParams.get("url"); if (!fetchUrl || !isValidUrl(fetchUrl)) { throw new BadRequestError(); } const streamResponse = createStreamResponse(); if (isYoutubeLink(fetchUrl)) { processCreateYoutubeVideo({ fetchUrl, streamResponse }).then( (article) => { log.debug("created article from youtube", { article }); }, ).catch((err) => { log.error(err); }).finally(() => { streamResponse.cancel(); }); } else { processCreateArticle({ fetchUrl, streamResponse }).then((article) => { log.debug("created article from link", { article }); }).catch((err) => { log.error(err); }).finally(() => { streamResponse.cancel(); }); } return streamResponse.response; }, };