192 lines
5.2 KiB
TypeScript
192 lines
5.2 KiB
TypeScript
import { FreshContext, Handlers } from "$fresh/server.ts";
|
|
import { fileExtension } from "https://deno.land/x/file_extension@v2.1.0/mod.ts";
|
|
import { formatDate, safeFileName } from "@lib/string.ts";
|
|
import { createStreamResponse } from "@lib/helpers.ts";
|
|
import {
|
|
AccessDeniedError,
|
|
BadRequestError,
|
|
NotFoundError,
|
|
} from "@lib/errors.ts";
|
|
import { createResource, fetchResource } from "@lib/marka/index.ts";
|
|
import { ArticleResource } from "@lib/marka/schema.ts";
|
|
import { webScrape } from "@lib/webScraper.ts";
|
|
import * as openai from "@lib/openai.ts";
|
|
import * as unsplash from "@lib/unsplash.ts";
|
|
import { createLogger } from "@lib/log/index.ts";
|
|
|
|
function ext(str: string) {
|
|
try {
|
|
const u = new URL(str);
|
|
if (u.searchParams.has("fm")) {
|
|
return u.searchParams.get("fm")!;
|
|
}
|
|
return fileExtension(u.pathname);
|
|
} catch (_e) {
|
|
return fileExtension(str);
|
|
}
|
|
}
|
|
|
|
const log = createLogger("api/article/enhance");
|
|
|
|
async function getUnsplashCoverImage(
|
|
content: string,
|
|
streamResponse: ReturnType<typeof createStreamResponse>,
|
|
): Promise<string | undefined> {
|
|
try {
|
|
streamResponse.info("creating unsplash search term");
|
|
const searchTerm = await openai.createUnsplashSearchTerm(content);
|
|
if (!searchTerm) return;
|
|
streamResponse.info(`searching for ${searchTerm}`);
|
|
const unsplashUrl = await unsplash.getImageBySearchTerm(searchTerm);
|
|
return unsplashUrl;
|
|
} catch (e) {
|
|
log.error("Failed to get unsplash cover image", e);
|
|
return undefined;
|
|
}
|
|
}
|
|
|
|
async function fetchAndStoreCover(
|
|
imageUrl: string | undefined,
|
|
title: string,
|
|
streamResponse: ReturnType<typeof createStreamResponse>,
|
|
): Promise<string | undefined> {
|
|
if (!imageUrl) return;
|
|
const imagePath = `articles/images/${safeFileName(title)}_cover.${
|
|
ext(imageUrl)
|
|
}`;
|
|
try {
|
|
streamResponse.info("downloading cover");
|
|
const res = await fetch(imageUrl);
|
|
if (!res.ok) {
|
|
log.error(`Failed to download remote image: ${imageUrl}`, {
|
|
status: res.status,
|
|
});
|
|
return;
|
|
}
|
|
const buffer = await res.arrayBuffer();
|
|
streamResponse.info("saving cover");
|
|
await createResource(imagePath, buffer);
|
|
return `resources/${imagePath}`;
|
|
} catch (err) {
|
|
log.error(`Failed to save image: ${imageUrl}`, err);
|
|
return;
|
|
}
|
|
}
|
|
|
|
async function processEnhanceArticle(
|
|
name: string,
|
|
streamResponse: ReturnType<typeof createStreamResponse>,
|
|
) {
|
|
const article = await fetchResource<ArticleResource>(
|
|
`articles/${name}`,
|
|
);
|
|
if (!article) {
|
|
throw new NotFoundError();
|
|
}
|
|
|
|
const fetchUrl = article.content?.url;
|
|
if (!fetchUrl) {
|
|
throw new BadRequestError("Article has no URL to enhance from.");
|
|
}
|
|
|
|
log.info("enhancing article from url", { url: fetchUrl });
|
|
streamResponse.info("scraping url");
|
|
const result = await webScrape(fetchUrl, streamResponse);
|
|
|
|
streamResponse.info("parsing content");
|
|
|
|
log.debug("downloaded and parsed", result);
|
|
|
|
streamResponse.info("extracting metadata with openai");
|
|
const aiMeta = await openai.extractArticleMetadata(result.markdown);
|
|
|
|
const title = result?.title || aiMeta?.headline ||
|
|
article.content?.headline || "";
|
|
|
|
article.content ??= {
|
|
_type: "Article",
|
|
headline: title,
|
|
url: fetchUrl,
|
|
};
|
|
|
|
article.content.articleBody = result.markdown;
|
|
article.content.datePublished ??= formatDate(
|
|
result?.published || aiMeta?.datePublished || undefined,
|
|
);
|
|
|
|
if (!article.content.author?.name || article.content.author.name === "") {
|
|
article.content.author = {
|
|
_type: "Person",
|
|
name: (result.schemaOrgData?.author?.name || aiMeta?.author || "")
|
|
.replace(
|
|
"@",
|
|
"twitter:",
|
|
),
|
|
};
|
|
}
|
|
|
|
if (!article.content.image) {
|
|
let coverImagePath: string | undefined = undefined;
|
|
if (result?.image?.length) {
|
|
log.debug("using local image for cover image", { image: result.image });
|
|
coverImagePath = await fetchAndStoreCover(
|
|
result.image,
|
|
title,
|
|
streamResponse,
|
|
);
|
|
} else {
|
|
const urlPath = await getUnsplashCoverImage(
|
|
result.content,
|
|
streamResponse,
|
|
);
|
|
coverImagePath = await fetchAndStoreCover(urlPath, title, streamResponse);
|
|
log.debug("using unsplash for cover image", { image: coverImagePath });
|
|
}
|
|
if (coverImagePath) {
|
|
article.content.image = coverImagePath;
|
|
}
|
|
}
|
|
|
|
log.debug("writing to disk", {
|
|
name: name,
|
|
article: {
|
|
...article,
|
|
content: {
|
|
...article.content,
|
|
articleBody: article.content.articleBody?.slice(0, 200),
|
|
},
|
|
},
|
|
});
|
|
|
|
streamResponse.info("writing to disk");
|
|
await createResource(`articles/${name}`, article.content);
|
|
streamResponse.send({ type: "finished", url: name.replace(/$\.md/, "") });
|
|
}
|
|
|
|
const POST = (
|
|
_req: Request,
|
|
ctx: FreshContext,
|
|
): Response => {
|
|
const session = ctx.state.session;
|
|
if (!session) {
|
|
throw new AccessDeniedError();
|
|
}
|
|
|
|
const streamResponse = createStreamResponse();
|
|
|
|
processEnhanceArticle(ctx.params.name, streamResponse)
|
|
.catch((err) => {
|
|
log.error(err);
|
|
streamResponse.error(err.message);
|
|
})
|
|
.finally(() => {
|
|
streamResponse.cancel();
|
|
});
|
|
|
|
return streamResponse.response;
|
|
};
|
|
|
|
export const handler: Handlers = {
|
|
POST,
|
|
};
|