Files
memorium/routes/api/articles/create/index.ts
2025-11-03 00:03:27 +01:00

155 lines
4.1 KiB
TypeScript

import { Handlers } from "$fresh/server.ts";
import { Defuddle } from "defuddle/node";
import { AccessDeniedError, BadRequestError } from "@lib/errors.ts";
import { createStreamResponse, isValidUrl } from "@lib/helpers.ts";
import * as openai from "@lib/openai.ts";
import { getYoutubeVideoDetails } from "@lib/youtube.ts";
import {
extractYoutubeId,
isYoutubeLink,
toUrlSafeString,
} from "@lib/string.ts";
import { createLogger } from "@lib/log/index.ts";
import { createResource } from "@lib/marka/index.ts";
import { webScrape } from "@lib/webScraper.ts";
import { ArticleResource } from "@lib/marka/schema.ts";
const log = createLogger("api/article");
async function processCreateArticle(
{ fetchUrl, streamResponse }: {
fetchUrl: string;
streamResponse: ReturnType<typeof createStreamResponse>;
},
) {
log.info("create article from url", { url: fetchUrl });
streamResponse.enqueue("downloading article");
const doc = await webScrape(fetchUrl, streamResponse);
const result = await Defuddle(doc, fetchUrl, {
markdown: true,
});
log.debug("downloaded and parse parsed", {
url: fetchUrl,
content: result.content,
});
streamResponse.enqueue("parsed article, creating tags with openai");
const aiMeta = await openai.extractArticleMetadata(result.content);
streamResponse.enqueue("postprocessing article");
const title = result?.title || aiMeta?.headline || "";
const id = toUrlSafeString(title);
const newArticle: ArticleResource["content"] = {
_type: "Article",
headline: title,
articleBody: result.content,
url: fetchUrl,
datePublished: result?.published || aiMeta?.datePublished ||
new Date().toISOString(),
image: result?.image,
author: {
_type: "Person",
name: (result.schemaOrgData?.author?.name || aiMeta?.author || "")
.replace(
"@",
"twitter:",
),
},
} as const;
streamResponse.enqueue("writing to disk");
await createResource(`articles/${id}.md`, newArticle);
streamResponse.enqueue("id: " + id);
}
async function processCreateYoutubeVideo(
{ fetchUrl, streamResponse }: {
fetchUrl: string;
streamResponse: ReturnType<typeof createStreamResponse>;
},
) {
log.info("create youtube article from url", {
url: fetchUrl,
});
streamResponse.enqueue("getting video infos from youtube api");
const youtubeId = extractYoutubeId(fetchUrl);
const video = await getYoutubeVideoDetails(youtubeId);
streamResponse.enqueue("shortening title with openai");
const newId = await openai.shortenTitle(video.snippet.title);
const id = newId || youtubeId;
const newArticle: ArticleResource["content"] = {
_type: "Article",
headline: video.snippet.title,
articleBody: video.snippet.description,
url: fetchUrl,
datePublished: new Date(video.snippet.publishedAt).toISOString(),
author: {
_type: "Person",
name: video.snippet.channelTitle,
},
};
streamResponse.enqueue("creating article");
await createResource(`articles/${id}.md`, newArticle);
streamResponse.enqueue("finished");
streamResponse.enqueue("id: " + id);
}
export const handler: Handlers = {
GET(req, ctx) {
const session = ctx.state.session;
if (!session) {
throw new AccessDeniedError();
}
const url = new URL(req.url);
const fetchUrl = url.searchParams.get("url");
if (!fetchUrl || !isValidUrl(fetchUrl)) {
throw new BadRequestError();
}
const streamResponse = createStreamResponse();
if (isYoutubeLink(fetchUrl)) {
processCreateYoutubeVideo({ fetchUrl, streamResponse }).then(
(article) => {
log.debug("created article from youtube", { article });
},
).catch((err) => {
log.error(err);
}).finally(() => {
streamResponse.cancel();
});
} else {
processCreateArticle({ fetchUrl, streamResponse }).then((article) => {
log.debug("created article from link", { article });
}).catch((err) => {
log.error(err);
}).finally(() => {
streamResponse.cancel();
});
}
return streamResponse.response;
},
};