import { Handlers } from "$fresh/server.ts"; import { Readability } from "https://cdn.skypack.dev/@mozilla/readability"; import { DOMParser } from "https://deno.land/x/deno_dom@v0.1.38/deno-dom-wasm.ts"; import { BadRequestError } from "@lib/errors.ts"; import { createStreamResponse, isValidUrl, json } from "@lib/helpers.ts"; import * as openai from "@lib/openai.ts"; import tds from "https://cdn.skypack.dev/turndown@7.1.1"; //import { gfm } from "https://cdn.skypack.dev/@guyplusplus/turndown-plugin-gfm@1.0.7"; import { Article, createArticle } from "@lib/resource/articles.ts"; import { getYoutubeVideoDetails } from "@lib/youtube.ts"; import { extractYoutubeId, formatDate, isYoutubeLink } from "@lib/string.ts"; const parser = new DOMParser(); //service.use(gfm); async function processCreateArticle( { fetchUrl, streamResponse }: { fetchUrl: string; streamResponse: ReturnType; }, ) { console.log("[api/article] create article from url", { url: fetchUrl }); streamResponse.enqueue("downloading article"); const request = await fetch(fetchUrl); const html = await request.text(); streamResponse.enqueue("download success"); const document = parser.parseFromString(html, "text/html"); const title = document?.querySelector("title")?.innerText; const metaAuthor = document?.querySelector('meta[name="twitter:creator"]')?.getAttribute( "content", ) || document?.querySelector('meta[name="author"]')?.getAttribute("content"); const readable = new Readability(document); const result = readable.parse(); console.log("[api/article] parsed ", { url: fetchUrl, content: result.textContent, }); const cleanDocument = parser.parseFromString( result.content, "text/html", ); const service = new tds({ headingStyle: "atx", codeBlockStyle: "fenced", hr: "---", bulletListMarker: "-", }); const url = new URL(fetchUrl); service.addRule("fix image links", { filter: ["img"], replacement: function (_: string, node: HTMLImageElement) { const src = node.getAttribute("src"); const alt = node.getAttribute("alt") || ""; if (!src || src.startsWith("data:image")) return ""; if (src.startsWith("/")) { return `![${alt}](${url.origin}${src.replace(/$\//, "")})`; } return `![${alt}](${src})`; }, }); service.addRule("fix normal links", { filter: ["a"], replacement: function (content: string, node: HTMLImageElement) { const href = node.getAttribute("href"); if (!href) return content; if (href.startsWith("/")) { return `[${content}](${url.origin}${href.replace(/$\//, "")})`; } if (href.startsWith("#")) { return `[${content}](${url.href}#${href})`.replace("##", "#"); } return `[${content}](${href})`; }, }); const markdown = service.turndown(cleanDocument); streamResponse.enqueue("parsed article, creating tags with openai"); const [tags, shortTitle, author] = await Promise.all([ openai.createTags(markdown), title && openai.shortenTitle(title), metaAuthor || openai.extractAuthorName(markdown), ]); const id = shortTitle || title || ""; const newArticle = { id, name: title || "", content: markdown, tags: tags || [], meta: { author: (author || "").replace("@", "twitter:"), link: fetchUrl, status: "not-finished", date: new Date(), }, } as const; streamResponse.enqueue("finished processing"); await createArticle(newArticle); streamResponse.enqueue("id: " + newArticle.id); } async function processCreateYoutubeVideo( { fetchUrl, streamResponse }: { fetchUrl: string; streamResponse: ReturnType; }, ) { console.log("[api/article] create youtube article from url", { url: fetchUrl, }); streamResponse.enqueue("getting video infos from youtube api"); const id = extractYoutubeId(fetchUrl); const video = await getYoutubeVideoDetails(id); streamResponse.enqueue("shortening title with openai"); const newId = await openai.shortenTitle(video.snippet.title); const newArticle: Article = { name: video.snippet.title, id: newId || video.snippet.title, content: video.snippet.description, tags: video.snippet.tags.slice(0, 5), meta: { status: "not-finished", link: fetchUrl, author: video.snippet.channelTitle, date: new Date(video.snippet.publishedAt), }, }; streamResponse.enqueue("creating article"); await createArticle(newArticle); streamResponse.enqueue("finished"); streamResponse.enqueue("id: " + newArticle.id); } export const handler: Handlers = { GET(req) { const url = new URL(req.url); const fetchUrl = url.searchParams.get("url"); if (!fetchUrl || !isValidUrl(fetchUrl)) { throw new BadRequestError(); } const streamResponse = createStreamResponse(); if (isYoutubeLink(fetchUrl)) { processCreateYoutubeVideo({ fetchUrl, streamResponse }).then( (article) => { console.log({ article }); }, ).catch((err) => { console.log(err); }).finally(() => { streamResponse.cancel(); }); } else { processCreateArticle({ fetchUrl, streamResponse }).then((article) => { console.log({ article }); }).catch((err) => { console.log(err); }).finally(() => { streamResponse.cancel(); }); } return streamResponse.response; }, };