155 lines
4.1 KiB
TypeScript
155 lines
4.1 KiB
TypeScript
import { Handlers } from "$fresh/server.ts";
|
|
import { Defuddle } from "defuddle/node";
|
|
import { AccessDeniedError, BadRequestError } from "@lib/errors.ts";
|
|
import { createStreamResponse, isValidUrl } from "@lib/helpers.ts";
|
|
import * as openai from "@lib/openai.ts";
|
|
import { getYoutubeVideoDetails } from "@lib/youtube.ts";
|
|
import {
|
|
extractYoutubeId,
|
|
isYoutubeLink,
|
|
toUrlSafeString,
|
|
} from "@lib/string.ts";
|
|
import { createLogger } from "@lib/log/index.ts";
|
|
import { createResource } from "@lib/marka/index.ts";
|
|
import { webScrape } from "@lib/webScraper.ts";
|
|
import { ArticleResource } from "@lib/marka/schema.ts";
|
|
|
|
const log = createLogger("api/article");
|
|
|
|
async function processCreateArticle(
|
|
{ fetchUrl, streamResponse }: {
|
|
fetchUrl: string;
|
|
streamResponse: ReturnType<typeof createStreamResponse>;
|
|
},
|
|
) {
|
|
log.info("create article from url", { url: fetchUrl });
|
|
|
|
streamResponse.enqueue("downloading article");
|
|
|
|
const doc = await webScrape(fetchUrl, streamResponse);
|
|
|
|
const result = await Defuddle(doc, fetchUrl, {
|
|
markdown: true,
|
|
});
|
|
|
|
log.debug("downloaded and parse parsed", {
|
|
url: fetchUrl,
|
|
content: result.content,
|
|
});
|
|
|
|
streamResponse.enqueue("parsed article, creating tags with openai");
|
|
|
|
const aiMeta = await openai.extractArticleMetadata(result.content);
|
|
|
|
streamResponse.enqueue("postprocessing article");
|
|
|
|
const title = result?.title || aiMeta?.headline || "";
|
|
const id = toUrlSafeString(title);
|
|
|
|
const newArticle: ArticleResource["content"] = {
|
|
_type: "Article",
|
|
headline: title,
|
|
articleBody: result.content,
|
|
url: fetchUrl,
|
|
datePublished: result?.published || aiMeta?.datePublished ||
|
|
new Date().toISOString(),
|
|
image: result?.image,
|
|
author: {
|
|
_type: "Person",
|
|
name: (result.schemaOrgData?.author?.name || aiMeta?.author || "")
|
|
.replace(
|
|
"@",
|
|
"twitter:",
|
|
),
|
|
},
|
|
} as const;
|
|
|
|
streamResponse.enqueue("writing to disk");
|
|
|
|
await createResource(`articles/${id}.md`, newArticle);
|
|
|
|
streamResponse.enqueue("id: " + id);
|
|
}
|
|
|
|
async function processCreateYoutubeVideo(
|
|
{ fetchUrl, streamResponse }: {
|
|
fetchUrl: string;
|
|
streamResponse: ReturnType<typeof createStreamResponse>;
|
|
},
|
|
) {
|
|
log.info("create youtube article from url", {
|
|
url: fetchUrl,
|
|
});
|
|
|
|
streamResponse.enqueue("getting video infos from youtube api");
|
|
|
|
const youtubeId = extractYoutubeId(fetchUrl);
|
|
|
|
const video = await getYoutubeVideoDetails(youtubeId);
|
|
|
|
streamResponse.enqueue("shortening title with openai");
|
|
const newId = await openai.shortenTitle(video.snippet.title);
|
|
|
|
const id = newId || youtubeId;
|
|
|
|
const newArticle: ArticleResource["content"] = {
|
|
_type: "Article",
|
|
headline: video.snippet.title,
|
|
articleBody: video.snippet.description,
|
|
url: fetchUrl,
|
|
datePublished: new Date(video.snippet.publishedAt).toISOString(),
|
|
author: {
|
|
_type: "Person",
|
|
name: video.snippet.channelTitle,
|
|
},
|
|
};
|
|
|
|
streamResponse.enqueue("creating article");
|
|
|
|
await createResource(`articles/${id}.md`, newArticle);
|
|
|
|
streamResponse.enqueue("finished");
|
|
|
|
streamResponse.enqueue("id: " + id);
|
|
}
|
|
|
|
export const handler: Handlers = {
|
|
GET(req, ctx) {
|
|
const session = ctx.state.session;
|
|
if (!session) {
|
|
throw new AccessDeniedError();
|
|
}
|
|
|
|
const url = new URL(req.url);
|
|
const fetchUrl = url.searchParams.get("url");
|
|
|
|
if (!fetchUrl || !isValidUrl(fetchUrl)) {
|
|
throw new BadRequestError();
|
|
}
|
|
|
|
const streamResponse = createStreamResponse();
|
|
|
|
if (isYoutubeLink(fetchUrl)) {
|
|
processCreateYoutubeVideo({ fetchUrl, streamResponse }).then(
|
|
(article) => {
|
|
log.debug("created article from youtube", { article });
|
|
},
|
|
).catch((err) => {
|
|
log.error(err);
|
|
}).finally(() => {
|
|
streamResponse.cancel();
|
|
});
|
|
} else {
|
|
processCreateArticle({ fetchUrl, streamResponse }).then((article) => {
|
|
log.debug("created article from link", { article });
|
|
}).catch((err) => {
|
|
log.error(err);
|
|
}).finally(() => {
|
|
streamResponse.cancel();
|
|
});
|
|
}
|
|
|
|
return streamResponse.response;
|
|
},
|
|
};
|