feat: add ability to scrape youtube video

This commit is contained in:
2023-08-02 15:56:33 +02:00
parent cebbb8af2b
commit ba853342bd
10 changed files with 193 additions and 36 deletions

View File

@ -7,7 +7,9 @@ import * as openai from "@lib/openai.ts";
import tds from "https://cdn.skypack.dev/turndown@7.1.1";
//import { gfm } from "https://cdn.skypack.dev/@guyplusplus/turndown-plugin-gfm@1.0.7";
import { createArticle } from "@lib/resource/articles.ts";
import { Article, createArticle } from "@lib/resource/articles.ts";
import { getYoutubeVideoDetails } from "@lib/youtube.ts";
import { extractYoutubeId, formatDate, isYoutubeLink } from "@lib/string.ts";
const parser = new DOMParser();
@ -124,6 +126,47 @@ async function processCreateArticle(
streamResponse.enqueue("id: " + newArticle.id);
}
async function processCreateYoutubeVideo(
{ fetchUrl, streamResponse }: {
fetchUrl: string;
streamResponse: ReturnType<typeof createStreamResponse>;
},
) {
console.log("[api/article] create youtube article from url", {
url: fetchUrl,
});
streamResponse.enqueue("getting video infos from youtube api");
const id = extractYoutubeId(fetchUrl);
const video = await getYoutubeVideoDetails(id);
streamResponse.enqueue("shortening title with openai");
const newId = await openai.shortenTitle(video.snippet.title);
const newArticle: Article = {
name: video.snippet.title,
id: newId || video.snippet.title,
content: video.snippet.description,
tags: video.snippet.tags.slice(0, 5),
meta: {
status: "not-finished",
link: fetchUrl,
author: video.snippet.channelTitle,
date: new Date(video.snippet.publishedAt),
},
};
streamResponse.enqueue("creating article");
await createArticle(newArticle);
streamResponse.enqueue("finished");
streamResponse.enqueue("id: " + newArticle.id);
}
export const handler: Handlers = {
GET(req) {
const url = new URL(req.url);
@ -135,13 +178,25 @@ export const handler: Handlers = {
const streamResponse = createStreamResponse();
processCreateArticle({ fetchUrl, streamResponse }).then((article) => {
console.log({ article });
}).catch((err) => {
console.log(err);
}).finally(() => {
streamResponse.cancel();
});
if (isYoutubeLink(fetchUrl)) {
processCreateYoutubeVideo({ fetchUrl, streamResponse }).then(
(article) => {
console.log({ article });
},
).catch((err) => {
console.log(err);
}).finally(() => {
streamResponse.cancel();
});
} else {
processCreateArticle({ fetchUrl, streamResponse }).then((article) => {
console.log({ article });
}).catch((err) => {
console.log(err);
}).finally(() => {
streamResponse.cancel();
});
}
return streamResponse.response;
},