Files

192 lines
5.2 KiB
TypeScript

import { FreshContext, Handlers } from "$fresh/server.ts";
import { fileExtension } from "https://deno.land/x/file_extension@v2.1.0/mod.ts";
import { formatDate, safeFileName } from "@lib/string.ts";
import { createStreamResponse } from "@lib/helpers.ts";
import {
AccessDeniedError,
BadRequestError,
NotFoundError,
} from "@lib/errors.ts";
import { createResource, fetchResource } from "@lib/marka/index.ts";
import { ArticleResource } from "@lib/marka/schema.ts";
import { webScrape } from "@lib/webScraper.ts";
import * as openai from "@lib/openai.ts";
import * as unsplash from "@lib/unsplash.ts";
import { createLogger } from "@lib/log/index.ts";
function ext(str: string) {
try {
const u = new URL(str);
if (u.searchParams.has("fm")) {
return u.searchParams.get("fm")!;
}
return fileExtension(u.pathname);
} catch (_e) {
return fileExtension(str);
}
}
const log = createLogger("api/article/enhance");
async function getUnsplashCoverImage(
content: string,
streamResponse: ReturnType<typeof createStreamResponse>,
): Promise<string | undefined> {
try {
streamResponse.info("creating unsplash search term");
const searchTerm = await openai.createUnsplashSearchTerm(content);
if (!searchTerm) return;
streamResponse.info(`searching for ${searchTerm}`);
const unsplashUrl = await unsplash.getImageBySearchTerm(searchTerm);
return unsplashUrl;
} catch (e) {
log.error("Failed to get unsplash cover image", e);
return undefined;
}
}
async function fetchAndStoreCover(
imageUrl: string | undefined,
title: string,
streamResponse: ReturnType<typeof createStreamResponse>,
): Promise<string | undefined> {
if (!imageUrl) return;
const imagePath = `articles/images/${safeFileName(title)}_cover.${
ext(imageUrl)
}`;
try {
streamResponse.info("downloading cover");
const res = await fetch(imageUrl);
if (!res.ok) {
log.error(`Failed to download remote image: ${imageUrl}`, {
status: res.status,
});
return;
}
const buffer = await res.arrayBuffer();
streamResponse.info("saving cover");
await createResource(imagePath, buffer);
return `resources/${imagePath}`;
} catch (err) {
log.error(`Failed to save image: ${imageUrl}`, err);
return;
}
}
async function processEnhanceArticle(
name: string,
streamResponse: ReturnType<typeof createStreamResponse>,
) {
const article = await fetchResource<ArticleResource>(
`articles/${name}`,
);
if (!article) {
throw new NotFoundError();
}
const fetchUrl = article.content?.url;
if (!fetchUrl) {
throw new BadRequestError("Article has no URL to enhance from.");
}
log.info("enhancing article from url", { url: fetchUrl });
streamResponse.info("scraping url");
const result = await webScrape(fetchUrl, streamResponse);
streamResponse.info("parsing content");
log.debug("downloaded and parsed", result);
streamResponse.info("extracting metadata with openai");
const aiMeta = await openai.extractArticleMetadata(result.markdown);
const title = result?.title || aiMeta?.headline ||
article.content?.headline || "";
article.content ??= {
_type: "Article",
headline: title,
url: fetchUrl,
};
article.content.articleBody = result.markdown;
article.content.datePublished ??= formatDate(
result?.published || aiMeta?.datePublished || undefined,
);
if (!article.content.author?.name || article.content.author.name === "") {
article.content.author = {
_type: "Person",
name: (result.schemaOrgData?.author?.name || aiMeta?.author || "")
.replace(
"@",
"twitter:",
),
};
}
if (!article.content.image) {
let coverImagePath: string | undefined = undefined;
if (result?.image?.length) {
log.debug("using local image for cover image", { image: result.image });
coverImagePath = await fetchAndStoreCover(
result.image,
title,
streamResponse,
);
} else {
const urlPath = await getUnsplashCoverImage(
result.content,
streamResponse,
);
coverImagePath = await fetchAndStoreCover(urlPath, title, streamResponse);
log.debug("using unsplash for cover image", { image: coverImagePath });
}
if (coverImagePath) {
article.content.image = coverImagePath;
}
}
log.debug("writing to disk", {
name: name,
article: {
...article,
content: {
...article.content,
articleBody: article.content.articleBody?.slice(0, 200),
},
},
});
streamResponse.info("writing to disk");
await createResource(`articles/${name}`, article.content);
streamResponse.send({ type: "finished", url: name.replace(/$\.md/, "") });
}
const POST = (
_req: Request,
ctx: FreshContext,
): Response => {
const session = ctx.state.session;
if (!session) {
throw new AccessDeniedError();
}
const streamResponse = createStreamResponse();
processEnhanceArticle(ctx.params.name, streamResponse)
.catch((err) => {
log.error(err);
streamResponse.error(err.message);
})
.finally(() => {
streamResponse.cancel();
});
return streamResponse.response;
};
export const handler: Handlers = {
POST,
};