243 lines
6.6 KiB
TypeScript
243 lines
6.6 KiB
TypeScript
import { Handlers } from "$fresh/server.ts";
|
|
import { Defuddle } from "defuddle/node";
|
|
import { AccessDeniedError, BadRequestError } from "@lib/errors.ts";
|
|
import { createStreamResponse, isValidUrl } from "@lib/helpers.ts";
|
|
import * as openai from "@lib/openai.ts";
|
|
import * as unsplash from "@lib/unsplash.ts";
|
|
import { getYoutubeVideoDetails } from "@lib/youtube.ts";
|
|
import {
|
|
extractYoutubeId,
|
|
formatDate,
|
|
isYoutubeLink,
|
|
safeFileName,
|
|
toUrlSafeString,
|
|
} from "@lib/string.ts";
|
|
import { createLogger } from "@lib/log/index.ts";
|
|
import { createResource } from "@lib/marka/index.ts";
|
|
import { webScrape } from "@lib/webScraper.ts";
|
|
import { ArticleResource } from "@lib/marka/schema.ts";
|
|
import { fileExtension } from "https://deno.land/x/file_extension@v2.1.0/mod.ts";
|
|
|
|
const log = createLogger("api/article");
|
|
|
|
async function getUnsplashCoverImage(
|
|
content: string,
|
|
streamResponse: ReturnType<typeof createStreamResponse>,
|
|
): Promise<string | undefined> {
|
|
try {
|
|
streamResponse.info("creating unsplash search term");
|
|
const searchTerm = await openai.createUnsplashSearchTerm(content);
|
|
if (!searchTerm) return;
|
|
streamResponse.info(`searching for ${searchTerm}`);
|
|
const unsplashUrl = await unsplash.getImageBySearchTerm(searchTerm);
|
|
return unsplashUrl;
|
|
} catch (e) {
|
|
log.error("Failed to get unsplash cover image", e);
|
|
return undefined;
|
|
}
|
|
}
|
|
|
|
function ext(str: string) {
|
|
try {
|
|
const u = new URL(str);
|
|
if (u.searchParams.has("fm")) {
|
|
return u.searchParams.get("fm")!;
|
|
}
|
|
return fileExtension(u.pathname);
|
|
} catch (_e) {
|
|
return fileExtension(str);
|
|
}
|
|
}
|
|
|
|
async function fetchAndStoreCover(
|
|
imageUrl: string | undefined,
|
|
title: string,
|
|
streamResponse?: ReturnType<typeof createStreamResponse>,
|
|
): Promise<string | undefined> {
|
|
if (!imageUrl) return;
|
|
const imagePath = `articles/images/${safeFileName(title)}_cover.${
|
|
ext(imageUrl)
|
|
}`;
|
|
try {
|
|
streamResponse?.info("downloading image");
|
|
const res = await fetch(imageUrl);
|
|
streamResponse?.info("saving image");
|
|
if (!res.ok) {
|
|
console.log(`Failed to download remote image: ${imageUrl}`, res.status);
|
|
return;
|
|
}
|
|
const buffer = await res.arrayBuffer();
|
|
await createResource(imagePath, buffer);
|
|
return `resources/${imagePath}`;
|
|
} catch (err) {
|
|
console.log(`Failed to save image: ${imageUrl}`, err);
|
|
return;
|
|
}
|
|
}
|
|
|
|
async function processCreateArticle(
|
|
{ fetchUrl, streamResponse }: {
|
|
fetchUrl: string;
|
|
streamResponse: ReturnType<typeof createStreamResponse>;
|
|
},
|
|
) {
|
|
log.info("create article from url", { url: fetchUrl });
|
|
|
|
streamResponse.info("downloading article");
|
|
|
|
const result = await webScrape(fetchUrl, streamResponse);
|
|
|
|
log.debug("downloaded and parse parsed", result);
|
|
|
|
streamResponse.info("parsed article, creating tags with openai");
|
|
|
|
const aiMeta = await openai.extractArticleMetadata(result.markdown);
|
|
|
|
streamResponse.info("postprocessing article");
|
|
|
|
const title = result?.title || aiMeta?.headline || "";
|
|
|
|
let coverImagePath: string | undefined = undefined;
|
|
if (result?.image?.length) {
|
|
log.debug("using local image for cover image", { image: result.image });
|
|
coverImagePath = await fetchAndStoreCover(
|
|
result.image,
|
|
title,
|
|
streamResponse,
|
|
);
|
|
} else {
|
|
const urlPath = await getUnsplashCoverImage(
|
|
result.markdown,
|
|
streamResponse,
|
|
);
|
|
coverImagePath = await fetchAndStoreCover(urlPath, title, streamResponse);
|
|
log.debug("using unsplash for cover image", { image: coverImagePath });
|
|
}
|
|
|
|
const url = toUrlSafeString(title);
|
|
|
|
const newArticle: ArticleResource["content"] = {
|
|
_type: "Article",
|
|
headline: title,
|
|
articleBody: result.markdown,
|
|
url: fetchUrl,
|
|
datePublished: formatDate(
|
|
result?.published || aiMeta?.datePublished || undefined,
|
|
),
|
|
image: coverImagePath,
|
|
author: {
|
|
_type: "Person",
|
|
name: (result.schemaOrgData?.author?.name || aiMeta?.author || "")
|
|
.replace(
|
|
"@",
|
|
"twitter:",
|
|
),
|
|
},
|
|
} as const;
|
|
|
|
streamResponse.info("writing to disk");
|
|
|
|
log.debug("writing to disk", {
|
|
...newArticle,
|
|
articleBody: newArticle.articleBody?.slice(0, 200),
|
|
});
|
|
|
|
await createResource(`articles/${url}.md`, newArticle);
|
|
|
|
streamResponse.send({ type: "finished", url });
|
|
}
|
|
|
|
async function processCreateYoutubeVideo(
|
|
{ fetchUrl, streamResponse }: {
|
|
fetchUrl: string;
|
|
streamResponse: ReturnType<typeof createStreamResponse>;
|
|
},
|
|
) {
|
|
log.info("create youtube article from url", {
|
|
url: fetchUrl,
|
|
});
|
|
|
|
streamResponse.info("getting video infos from youtube api");
|
|
|
|
const youtubeId = extractYoutubeId(fetchUrl);
|
|
|
|
const video = await getYoutubeVideoDetails(youtubeId);
|
|
|
|
streamResponse.info("shortening title with openai");
|
|
const videoTitle = await openai.shortenTitle(video.snippet.title) ||
|
|
video.snippet.title;
|
|
|
|
const thumbnail = video?.snippet?.thumbnails?.maxres;
|
|
const coverImagePath = await fetchAndStoreCover(
|
|
thumbnail.url,
|
|
videoTitle || video.snippet.title,
|
|
streamResponse,
|
|
);
|
|
|
|
const newArticle: ArticleResource["content"] = {
|
|
_type: "Article",
|
|
headline: video.snippet.title,
|
|
articleBody: video.snippet.description,
|
|
image: coverImagePath,
|
|
url: fetchUrl,
|
|
datePublished: formatDate(video.snippet.publishedAt),
|
|
author: {
|
|
_type: "Person",
|
|
name: video.snippet.channelTitle,
|
|
},
|
|
};
|
|
|
|
streamResponse.info("creating article");
|
|
|
|
const filename = toUrlSafeString(videoTitle);
|
|
|
|
await createResource(
|
|
`articles/${filename}.md`,
|
|
newArticle,
|
|
);
|
|
|
|
streamResponse.info("finished");
|
|
|
|
streamResponse.send({ type: "finished", url: filename });
|
|
}
|
|
|
|
export const handler: Handlers = {
|
|
GET(req, ctx) {
|
|
const session = ctx.state.session;
|
|
if (!session) {
|
|
throw new AccessDeniedError();
|
|
}
|
|
|
|
const url = new URL(req.url);
|
|
const fetchUrl = url.searchParams.get("url");
|
|
|
|
if (!fetchUrl || !isValidUrl(fetchUrl)) {
|
|
throw new BadRequestError();
|
|
}
|
|
|
|
const streamResponse = createStreamResponse();
|
|
|
|
if (isYoutubeLink(fetchUrl)) {
|
|
processCreateYoutubeVideo({ fetchUrl, streamResponse }).then(
|
|
(article) => {
|
|
log.debug("created article from youtube", { article });
|
|
},
|
|
).catch((err) => {
|
|
log.error(err);
|
|
}).finally(() => {
|
|
streamResponse.cancel();
|
|
});
|
|
} else {
|
|
processCreateArticle({ fetchUrl, streamResponse }).then((article) => {
|
|
log.debug("created article from link", { article });
|
|
}).catch((err) => {
|
|
log.error(err);
|
|
}).finally(() => {
|
|
streamResponse.cancel();
|
|
});
|
|
}
|
|
|
|
return streamResponse.response;
|
|
},
|
|
};
|