Files
memorium/routes/api/articles/create/index.ts

243 lines
6.6 KiB
TypeScript

import { Handlers } from "$fresh/server.ts";
import { Defuddle } from "defuddle/node";
import { AccessDeniedError, BadRequestError } from "@lib/errors.ts";
import { createStreamResponse, isValidUrl } from "@lib/helpers.ts";
import * as openai from "@lib/openai.ts";
import * as unsplash from "@lib/unsplash.ts";
import { getYoutubeVideoDetails } from "@lib/youtube.ts";
import {
extractYoutubeId,
formatDate,
isYoutubeLink,
safeFileName,
toUrlSafeString,
} from "@lib/string.ts";
import { createLogger } from "@lib/log/index.ts";
import { createResource } from "@lib/marka/index.ts";
import { webScrape } from "@lib/webScraper.ts";
import { ArticleResource } from "@lib/marka/schema.ts";
import { fileExtension } from "https://deno.land/x/file_extension@v2.1.0/mod.ts";
const log = createLogger("api/article");
async function getUnsplashCoverImage(
content: string,
streamResponse: ReturnType<typeof createStreamResponse>,
): Promise<string | undefined> {
try {
streamResponse.info("creating unsplash search term");
const searchTerm = await openai.createUnsplashSearchTerm(content);
if (!searchTerm) return;
streamResponse.info(`searching for ${searchTerm}`);
const unsplashUrl = await unsplash.getImageBySearchTerm(searchTerm);
return unsplashUrl;
} catch (e) {
log.error("Failed to get unsplash cover image", e);
return undefined;
}
}
function ext(str: string) {
try {
const u = new URL(str);
if (u.searchParams.has("fm")) {
return u.searchParams.get("fm")!;
}
return fileExtension(u.pathname);
} catch (_e) {
return fileExtension(str);
}
}
async function fetchAndStoreCover(
imageUrl: string | undefined,
title: string,
streamResponse?: ReturnType<typeof createStreamResponse>,
): Promise<string | undefined> {
if (!imageUrl) return;
const imagePath = `articles/images/${safeFileName(title)}_cover.${
ext(imageUrl)
}`;
try {
streamResponse?.info("downloading image");
const res = await fetch(imageUrl);
streamResponse?.info("saving image");
if (!res.ok) {
console.log(`Failed to download remote image: ${imageUrl}`, res.status);
return;
}
const buffer = await res.arrayBuffer();
await createResource(imagePath, buffer);
return `resources/${imagePath}`;
} catch (err) {
console.log(`Failed to save image: ${imageUrl}`, err);
return;
}
}
async function processCreateArticle(
{ fetchUrl, streamResponse }: {
fetchUrl: string;
streamResponse: ReturnType<typeof createStreamResponse>;
},
) {
log.info("create article from url", { url: fetchUrl });
streamResponse.info("downloading article");
const result = await webScrape(fetchUrl, streamResponse);
log.debug("downloaded and parse parsed", result);
streamResponse.info("parsed article, creating tags with openai");
const aiMeta = await openai.extractArticleMetadata(result.markdown);
streamResponse.info("postprocessing article");
const title = result?.title || aiMeta?.headline || "";
let coverImagePath: string | undefined = undefined;
if (result?.image?.length) {
log.debug("using local image for cover image", { image: result.image });
coverImagePath = await fetchAndStoreCover(
result.image,
title,
streamResponse,
);
} else {
const urlPath = await getUnsplashCoverImage(
result.markdown,
streamResponse,
);
coverImagePath = await fetchAndStoreCover(urlPath, title, streamResponse);
log.debug("using unsplash for cover image", { image: coverImagePath });
}
const url = toUrlSafeString(title);
const newArticle: ArticleResource["content"] = {
_type: "Article",
headline: title,
articleBody: result.markdown,
url: fetchUrl,
datePublished: formatDate(
result?.published || aiMeta?.datePublished || undefined,
),
image: coverImagePath,
author: {
_type: "Person",
name: (result.schemaOrgData?.author?.name || aiMeta?.author || "")
.replace(
"@",
"twitter:",
),
},
} as const;
streamResponse.info("writing to disk");
log.debug("writing to disk", {
...newArticle,
articleBody: newArticle.articleBody?.slice(0, 200),
});
await createResource(`articles/${url}.md`, newArticle);
streamResponse.send({ type: "finished", url });
}
async function processCreateYoutubeVideo(
{ fetchUrl, streamResponse }: {
fetchUrl: string;
streamResponse: ReturnType<typeof createStreamResponse>;
},
) {
log.info("create youtube article from url", {
url: fetchUrl,
});
streamResponse.info("getting video infos from youtube api");
const youtubeId = extractYoutubeId(fetchUrl);
const video = await getYoutubeVideoDetails(youtubeId);
streamResponse.info("shortening title with openai");
const videoTitle = await openai.shortenTitle(video.snippet.title) ||
video.snippet.title;
const thumbnail = video?.snippet?.thumbnails?.maxres;
const coverImagePath = await fetchAndStoreCover(
thumbnail.url,
videoTitle || video.snippet.title,
streamResponse,
);
const newArticle: ArticleResource["content"] = {
_type: "Article",
headline: video.snippet.title,
articleBody: video.snippet.description,
image: coverImagePath,
url: fetchUrl,
datePublished: formatDate(video.snippet.publishedAt),
author: {
_type: "Person",
name: video.snippet.channelTitle,
},
};
streamResponse.info("creating article");
const filename = toUrlSafeString(videoTitle);
await createResource(
`articles/${filename}.md`,
newArticle,
);
streamResponse.info("finished");
streamResponse.send({ type: "finished", url: filename });
}
export const handler: Handlers = {
GET(req, ctx) {
const session = ctx.state.session;
if (!session) {
throw new AccessDeniedError();
}
const url = new URL(req.url);
const fetchUrl = url.searchParams.get("url");
if (!fetchUrl || !isValidUrl(fetchUrl)) {
throw new BadRequestError();
}
const streamResponse = createStreamResponse();
if (isYoutubeLink(fetchUrl)) {
processCreateYoutubeVideo({ fetchUrl, streamResponse }).then(
(article) => {
log.debug("created article from youtube", { article });
},
).catch((err) => {
log.error(err);
}).finally(() => {
streamResponse.cancel();
});
} else {
processCreateArticle({ fetchUrl, streamResponse }).then((article) => {
log.debug("created article from link", { article });
}).catch((err) => {
log.error(err);
}).finally(() => {
streamResponse.cancel();
});
}
return streamResponse.response;
},
};