Files
memorium/routes/api/articles/create/index.ts
2025-11-05 00:42:53 +01:00

206 lines
5.5 KiB
TypeScript

import { Handlers } from "$fresh/server.ts";
import { Defuddle } from "defuddle/node";
import { AccessDeniedError, BadRequestError } from "@lib/errors.ts";
import { createStreamResponse, isValidUrl } from "@lib/helpers.ts";
import * as openai from "@lib/openai.ts";
import { getYoutubeVideoDetails } from "@lib/youtube.ts";
import {
extractYoutubeId,
formatDate,
isYoutubeLink,
safeFileName,
toUrlSafeString,
} from "@lib/string.ts";
import { createLogger } from "@lib/log/index.ts";
import { createResource } from "@lib/marka/index.ts";
import { webScrape } from "@lib/webScraper.ts";
import { ArticleResource } from "@lib/marka/schema.ts";
import { fileExtension } from "https://deno.land/x/file_extension@v2.1.0/mod.ts";
const log = createLogger("api/article");
async function fetchAndStoreCover(
imageUrl: string | undefined,
title: string,
streamResponse?: ReturnType<typeof createStreamResponse>,
): Promise<string | undefined> {
if (!imageUrl) return;
const imagePath = `articles/images/${safeFileName(title)}_cover.${
fileExtension(imageUrl)
}`;
try {
streamResponse?.enqueue("downloading image");
const res = await fetch(imageUrl);
streamResponse?.enqueue("saving image");
if (!res.ok) {
console.log(`Failed to download remote image: ${imageUrl}`, res.status);
return;
}
const buffer = await res.arrayBuffer();
await createResource(imagePath, buffer);
return `resources/${imagePath}`;
} catch (err) {
console.log(`Failed to save image: ${imageUrl}`, err);
return;
}
}
async function processCreateArticle(
{ fetchUrl, streamResponse }: {
fetchUrl: string;
streamResponse: ReturnType<typeof createStreamResponse>;
},
) {
log.info("create article from url", { url: fetchUrl });
streamResponse.enqueue("downloading article");
const doc = await webScrape(fetchUrl, streamResponse);
const result = await Defuddle(doc, fetchUrl, {
markdown: true,
});
log.debug("downloaded and parse parsed", {
...result,
url: fetchUrl,
content: result.content.slice(0, 200),
});
streamResponse.enqueue("parsed article, creating tags with openai");
const aiMeta = await openai.extractArticleMetadata(result.content);
streamResponse.enqueue("postprocessing article");
const title = result?.title || aiMeta?.headline || "";
const coverImagePath = await fetchAndStoreCover(
result.image,
title,
streamResponse,
);
const newArticle: ArticleResource["content"] = {
_type: "Article",
headline: title,
articleBody: result.content,
url: fetchUrl,
datePublished: formatDate(
result?.published || aiMeta?.datePublished || undefined,
),
image: coverImagePath,
author: {
_type: "Person",
name: (result.schemaOrgData?.author?.name || aiMeta?.author || "")
.replace(
"@",
"twitter:",
),
},
} as const;
streamResponse.enqueue("writing to disk");
log.debug("writing to disk", {
...newArticle,
articleBody: newArticle.articleBody?.slice(0, 200),
});
await createResource(`articles/${toUrlSafeString(title)}.md`, newArticle);
streamResponse.enqueue("id: " + title);
}
async function processCreateYoutubeVideo(
{ fetchUrl, streamResponse }: {
fetchUrl: string;
streamResponse: ReturnType<typeof createStreamResponse>;
},
) {
log.info("create youtube article from url", {
url: fetchUrl,
});
streamResponse.enqueue("getting video infos from youtube api");
const youtubeId = extractYoutubeId(fetchUrl);
const video = await getYoutubeVideoDetails(youtubeId);
streamResponse.enqueue("shortening title with openai");
const videoTitle = await openai.shortenTitle(video.snippet.title) ||
video.snippet.title;
const thumbnail = video?.snippet?.thumbnails?.maxres;
const coverImagePath = await fetchAndStoreCover(
thumbnail.url,
videoTitle || video.snippet.title,
streamResponse,
);
const newArticle: ArticleResource["content"] = {
_type: "Article",
headline: video.snippet.title,
articleBody: video.snippet.description,
image: coverImagePath,
url: fetchUrl,
datePublished: formatDate(video.snippet.publishedAt),
author: {
_type: "Person",
name: video.snippet.channelTitle,
},
};
streamResponse.enqueue("creating article");
await createResource(
`articles/${toUrlSafeString(videoTitle)}.md`,
newArticle,
);
streamResponse.enqueue("finished");
streamResponse.enqueue("id: " + toUrlSafeString(videoTitle));
}
export const handler: Handlers = {
GET(req, ctx) {
const session = ctx.state.session;
if (!session) {
throw new AccessDeniedError();
}
const url = new URL(req.url);
const fetchUrl = url.searchParams.get("url");
if (!fetchUrl || !isValidUrl(fetchUrl)) {
throw new BadRequestError();
}
const streamResponse = createStreamResponse();
if (isYoutubeLink(fetchUrl)) {
processCreateYoutubeVideo({ fetchUrl, streamResponse }).then(
(article) => {
log.debug("created article from youtube", { article });
},
).catch((err) => {
log.error(err);
}).finally(() => {
streamResponse.cancel();
});
} else {
processCreateArticle({ fetchUrl, streamResponse }).then((article) => {
log.debug("created article from link", { article });
}).catch((err) => {
log.error(err);
}).finally(() => {
streamResponse.cancel();
});
}
return streamResponse.response;
},
};