204 lines
5.4 KiB
TypeScript
Raw Normal View History

2023-08-01 21:35:21 +02:00
import { Handlers } from "$fresh/server.ts";
import { Readability } from "https://cdn.skypack.dev/@mozilla/readability";
import { DOMParser } from "https://deno.land/x/deno_dom@v0.1.38/deno-dom-wasm.ts";
import { BadRequestError } from "@lib/errors.ts";
import { createStreamResponse, isValidUrl, json } from "@lib/helpers.ts";
2023-08-01 21:35:21 +02:00
import * as openai from "@lib/openai.ts";
import tds from "https://cdn.skypack.dev/turndown@7.1.1";
//import { gfm } from "https://cdn.skypack.dev/@guyplusplus/turndown-plugin-gfm@1.0.7";
import { Article, createArticle } from "@lib/resource/articles.ts";
import { getYoutubeVideoDetails } from "@lib/youtube.ts";
import { extractYoutubeId, formatDate, isYoutubeLink } from "@lib/string.ts";
2023-08-01 21:35:21 +02:00
const parser = new DOMParser();
//service.use(gfm);
async function processCreateArticle(
{ fetchUrl, streamResponse }: {
fetchUrl: string;
streamResponse: ReturnType<typeof createStreamResponse>;
},
) {
console.log("[api/article] create article from url", { url: fetchUrl });
2023-08-01 21:35:21 +02:00
streamResponse.enqueue("downloading article");
const request = await fetch(fetchUrl);
const html = await request.text();
streamResponse.enqueue("download success");
const document = parser.parseFromString(html, "text/html");
const title = document?.querySelector("title")?.innerText;
const metaAuthor =
document?.querySelector('meta[name="twitter:creator"]')?.getAttribute(
"content",
) ||
document?.querySelector('meta[name="author"]')?.getAttribute("content");
const readable = new Readability(document);
const result = readable.parse();
console.log("[api/article] parsed ", {
url: fetchUrl,
content: result.textContent,
});
const cleanDocument = parser.parseFromString(
result.content,
"text/html",
);
const service = new tds({
headingStyle: "atx",
codeBlockStyle: "fenced",
hr: "---",
bulletListMarker: "-",
});
2023-08-01 21:35:21 +02:00
const url = new URL(fetchUrl);
service.addRule("fix image links", {
filter: ["img"],
replacement: function (_: string, node: HTMLImageElement) {
const src = node.getAttribute("src");
const alt = node.getAttribute("alt") || "";
if (!src || src.startsWith("data:image")) return "";
2023-08-01 21:35:21 +02:00
if (src.startsWith("/")) {
return `![${alt}](${url.origin}${src.replace(/$\//, "")})`;
}
2023-08-01 21:35:21 +02:00
return `![${alt}](${src})`;
},
});
service.addRule("fix normal links", {
filter: ["a"],
replacement: function (content: string, node: HTMLImageElement) {
const href = node.getAttribute("href");
if (!href) return content;
2023-08-01 21:35:21 +02:00
if (href.startsWith("/")) {
return `[${content}](${url.origin}${href.replace(/$\//, "")})`;
}
2023-08-01 21:35:21 +02:00
if (href.startsWith("#")) {
return `[${content}](${url.href}#${href})`.replace("##", "#");
}
2023-08-01 21:35:21 +02:00
return `[${content}](${href})`;
},
});
2023-08-01 21:35:21 +02:00
const markdown = service.turndown(cleanDocument);
streamResponse.enqueue("parsed article, creating tags with openai");
const [tags, shortTitle, author] = await Promise.all([
openai.createTags(markdown),
title && openai.shortenTitle(title),
metaAuthor || openai.extractAuthorName(markdown),
]);
const id = shortTitle || title || "";
const newArticle = {
id,
name: title || "",
content: markdown,
tags: tags || [],
meta: {
author: (author || "").replace("@", "twitter:"),
link: fetchUrl,
status: "not-finished",
date: new Date(),
},
} as const;
streamResponse.enqueue("finished processing");
await createArticle(newArticle);
streamResponse.enqueue("id: " + newArticle.id);
}
async function processCreateYoutubeVideo(
{ fetchUrl, streamResponse }: {
fetchUrl: string;
streamResponse: ReturnType<typeof createStreamResponse>;
},
) {
console.log("[api/article] create youtube article from url", {
url: fetchUrl,
});
streamResponse.enqueue("getting video infos from youtube api");
const id = extractYoutubeId(fetchUrl);
const video = await getYoutubeVideoDetails(id);
streamResponse.enqueue("shortening title with openai");
const newId = await openai.shortenTitle(video.snippet.title);
const newArticle: Article = {
name: video.snippet.title,
id: newId || video.snippet.title,
content: video.snippet.description,
tags: video.snippet.tags.slice(0, 5),
meta: {
status: "not-finished",
link: fetchUrl,
author: video.snippet.channelTitle,
date: new Date(video.snippet.publishedAt),
},
};
streamResponse.enqueue("creating article");
await createArticle(newArticle);
streamResponse.enqueue("finished");
streamResponse.enqueue("id: " + newArticle.id);
}
export const handler: Handlers = {
GET(req) {
const url = new URL(req.url);
const fetchUrl = url.searchParams.get("url");
if (!fetchUrl || !isValidUrl(fetchUrl)) {
throw new BadRequestError();
}
2023-08-01 21:35:21 +02:00
const streamResponse = createStreamResponse();
2023-08-01 21:35:21 +02:00
if (isYoutubeLink(fetchUrl)) {
processCreateYoutubeVideo({ fetchUrl, streamResponse }).then(
(article) => {
console.log({ article });
},
).catch((err) => {
console.log(err);
}).finally(() => {
streamResponse.cancel();
});
} else {
processCreateArticle({ fetchUrl, streamResponse }).then((article) => {
console.log({ article });
}).catch((err) => {
console.log(err);
}).finally(() => {
streamResponse.cancel();
});
}
2023-08-01 21:35:21 +02:00
return streamResponse.response;
2023-08-01 21:35:21 +02:00
},
};