feat: add ability to scrape youtube video

This commit is contained in:
max_richter 2023-08-02 15:56:33 +02:00
parent cebbb8af2b
commit ba853342bd
10 changed files with 193 additions and 36 deletions

View File

@ -1,30 +1,10 @@
export const isYoutubeLink = (link: string) => {
try {
const url = new URL(link);
return ["youtu.be", "youtube.com"].includes(url.hostname);
} catch (err) {
console.log(err);
return false;
}
};
function extractYoutubeId(link: string) {
const url = new URL(link);
if (url.searchParams.has("v")) {
const id = url.searchParams.get("v");
if (id?.length && id.length > 4) {
return id;
}
}
return url.pathname.replace(/^\//, "");
}
import { extractYoutubeId } from "@lib/string.ts";
export const YoutubePlayer = ({ link }: { link: string }) => {
const id = extractYoutubeId(link);
return (
<iframe
class="mb-6"
width="100%"
height="400px"
src={`https://www.youtube-nocookie.com/embed/${id}`}

4
lib/cache/image.ts vendored
View File

@ -69,8 +69,8 @@ export async function setImage(
const pointerId = await hash(cacheKey);
await cache.set(pointerId, clone);
cache.expire(pointerId, 60 * 10);
cache.expire(cacheKey, 60 * 10);
cache.expire(pointerId, 60 * 60 * 24);
cache.expire(cacheKey, 60 * 60 * 24);
await cache.set(
cacheKey,

View File

@ -3,3 +3,4 @@ export const REDIS_HOST = Deno.env.get("REDIS_HOST");
export const REDIS_PASS = Deno.env.get("REDIS_PASS");
export const TMDB_API_KEY = Deno.env.get("TMDB_API_KEY");
export const OPENAI_API_KEY = Deno.env.get("OPENAI_API_KEY");
export const YOUTUBE_API_KEY = Deno.env.get("YOUTUBE_API_KEY");

View File

@ -55,8 +55,12 @@ export async function extractAuthorName(content: string) {
const author = chatCompletion.choices[0].message.content;
if (author !== "not found") return author;
return "";
if (
author?.toLowerCase().includes("not") &&
author?.toLowerCase().includes("found")
) return "";
return author;
}
export async function createTags(content: string) {

View File

@ -30,3 +30,26 @@ export function extractHashTags(inputString: string) {
return hashtags;
}
export const isYoutubeLink = (link: string) => {
try {
const url = new URL(link);
return ["youtu.be", "youtube.com","www.youtube.com" ].includes(url.hostname);
} catch (err) {
console.log(err);
return false;
}
};
export function extractYoutubeId(link: string) {
const url = new URL(link);
if (url.searchParams.has("v")) {
const id = url.searchParams.get("v");
if (id?.length && id.length > 4) {
return id;
}
}
return url.pathname.replace(/^\//, "");
}

86
lib/youtube.ts Normal file
View File

@ -0,0 +1,86 @@
import { YOUTUBE_API_KEY } from "@lib/env.ts";
const BASE_URL = "https://youtube.googleapis.com/youtube/v3/";
export interface APIResponse {
kind: string;
etag: string;
items: Item[];
pageInfo: PageInfo;
}
export interface Item {
kind: string;
etag: string;
id: string;
snippet: Snippet;
contentDetails: ContentDetails;
statistics: Statistics;
}
export interface Snippet {
publishedAt: string;
channelId: string;
title: string;
description: string;
thumbnails: Thumbnails;
channelTitle: string;
tags: string[];
categoryId: string;
liveBroadcastContent: string;
localized: Localized;
}
export interface Thumbnails {
default: Resolution;
medium: Resolution;
high: Resolution;
standard: Resolution;
maxres: Resolution;
}
export interface Resolution {
url: string;
width: number;
height: number;
}
export interface Localized {
title: string;
description: string;
}
export interface ContentDetails {
duration: string;
dimension: string;
definition: string;
caption: string;
licensedContent: boolean;
contentRating: ContentRating;
projection: string;
}
export interface ContentRating {}
export interface Statistics {
viewCount: string;
likeCount: string;
favoriteCount: string;
commentCount: string;
}
export interface PageInfo {
totalResults: number;
resultsPerPage: number;
}
export async function getYoutubeVideoDetails(
id: string,
): Promise<Item> {
const response = await fetch(
`${BASE_URL}videos?part=snippet%2CcontentDetails%2Cstatistics&id=${id}&key=${YOUTUBE_API_KEY}`,
);
const json = await response.json();
return json?.items[0];
}

View File

@ -7,7 +7,9 @@ import * as openai from "@lib/openai.ts";
import tds from "https://cdn.skypack.dev/turndown@7.1.1";
//import { gfm } from "https://cdn.skypack.dev/@guyplusplus/turndown-plugin-gfm@1.0.7";
import { createArticle } from "@lib/resource/articles.ts";
import { Article, createArticle } from "@lib/resource/articles.ts";
import { getYoutubeVideoDetails } from "@lib/youtube.ts";
import { extractYoutubeId, formatDate, isYoutubeLink } from "@lib/string.ts";
const parser = new DOMParser();
@ -124,6 +126,47 @@ async function processCreateArticle(
streamResponse.enqueue("id: " + newArticle.id);
}
async function processCreateYoutubeVideo(
{ fetchUrl, streamResponse }: {
fetchUrl: string;
streamResponse: ReturnType<typeof createStreamResponse>;
},
) {
console.log("[api/article] create youtube article from url", {
url: fetchUrl,
});
streamResponse.enqueue("getting video infos from youtube api");
const id = extractYoutubeId(fetchUrl);
const video = await getYoutubeVideoDetails(id);
streamResponse.enqueue("shortening title with openai");
const newId = await openai.shortenTitle(video.snippet.title);
const newArticle: Article = {
name: video.snippet.title,
id: newId || video.snippet.title,
content: video.snippet.description,
tags: video.snippet.tags.slice(0, 5),
meta: {
status: "not-finished",
link: fetchUrl,
author: video.snippet.channelTitle,
date: new Date(video.snippet.publishedAt),
},
};
streamResponse.enqueue("creating article");
await createArticle(newArticle);
streamResponse.enqueue("finished");
streamResponse.enqueue("id: " + newArticle.id);
}
export const handler: Handlers = {
GET(req) {
const url = new URL(req.url);
@ -135,13 +178,25 @@ export const handler: Handlers = {
const streamResponse = createStreamResponse();
processCreateArticle({ fetchUrl, streamResponse }).then((article) => {
console.log({ article });
}).catch((err) => {
console.log(err);
}).finally(() => {
streamResponse.cancel();
});
if (isYoutubeLink(fetchUrl)) {
processCreateYoutubeVideo({ fetchUrl, streamResponse }).then(
(article) => {
console.log({ article });
},
).catch((err) => {
console.log(err);
}).finally(() => {
streamResponse.cancel();
});
} else {
processCreateArticle({ fetchUrl, streamResponse }).then((article) => {
console.log({ article });
}).catch((err) => {
console.log(err);
}).finally(() => {
streamResponse.cancel();
});
}
return streamResponse.response;
},

View File

@ -3,8 +3,9 @@ import { MainLayout } from "@components/layouts/main.tsx";
import { Article, getArticle } from "@lib/resource/articles.ts";
import { RecipeHero } from "@components/RecipeHero.tsx";
import { KMenu } from "@islands/KMenu.tsx";
import { isYoutubeLink, YoutubePlayer } from "@components/Youtube.tsx";
import { YoutubePlayer } from "@components/Youtube.tsx";
import { HashTags } from "@components/HashTags.tsx";
import { isYoutubeLink } from "@lib/string.ts";
export const handler: Handlers<Article | null> = {
async GET(_, ctx) {

View File

@ -31,7 +31,13 @@ export default function Greet(props: PageProps<Article[] | null>) {
<KMenu type="main" context={false} />
<Grid>
{props.data?.map((doc) => {
return <Card link={`/articles/${doc.id}`} title={doc.name} />;
return (
<Card
image={"/placeholder.svg"}
link={`/articles/${doc.id}`}
title={doc.name}
/>
);
})}
</Grid>
</MainLayout>

View File

@ -22,6 +22,7 @@ export default function Greet(props: PageProps<Movie>) {
<RecipeHero
data={movie}
subline={[author, date.toString()]}
editLink={`https://notes.max-richter.dev/Media/movies/${movie.id}`}
backlink="/movies"
/>
<KMenu type="main" context={movie} />