feat: add ability to scrape youtube video
This commit is contained in:
parent
cebbb8af2b
commit
ba853342bd
@ -1,30 +1,10 @@
|
||||
export const isYoutubeLink = (link: string) => {
|
||||
try {
|
||||
const url = new URL(link);
|
||||
return ["youtu.be", "youtube.com"].includes(url.hostname);
|
||||
} catch (err) {
|
||||
console.log(err);
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
function extractYoutubeId(link: string) {
|
||||
const url = new URL(link);
|
||||
if (url.searchParams.has("v")) {
|
||||
const id = url.searchParams.get("v");
|
||||
|
||||
if (id?.length && id.length > 4) {
|
||||
return id;
|
||||
}
|
||||
}
|
||||
|
||||
return url.pathname.replace(/^\//, "");
|
||||
}
|
||||
import { extractYoutubeId } from "@lib/string.ts";
|
||||
|
||||
export const YoutubePlayer = ({ link }: { link: string }) => {
|
||||
const id = extractYoutubeId(link);
|
||||
return (
|
||||
<iframe
|
||||
class="mb-6"
|
||||
width="100%"
|
||||
height="400px"
|
||||
src={`https://www.youtube-nocookie.com/embed/${id}`}
|
||||
|
4
lib/cache/image.ts
vendored
4
lib/cache/image.ts
vendored
@ -69,8 +69,8 @@ export async function setImage(
|
||||
const pointerId = await hash(cacheKey);
|
||||
|
||||
await cache.set(pointerId, clone);
|
||||
cache.expire(pointerId, 60 * 10);
|
||||
cache.expire(cacheKey, 60 * 10);
|
||||
cache.expire(pointerId, 60 * 60 * 24);
|
||||
cache.expire(cacheKey, 60 * 60 * 24);
|
||||
|
||||
await cache.set(
|
||||
cacheKey,
|
||||
|
@ -3,3 +3,4 @@ export const REDIS_HOST = Deno.env.get("REDIS_HOST");
|
||||
export const REDIS_PASS = Deno.env.get("REDIS_PASS");
|
||||
export const TMDB_API_KEY = Deno.env.get("TMDB_API_KEY");
|
||||
export const OPENAI_API_KEY = Deno.env.get("OPENAI_API_KEY");
|
||||
export const YOUTUBE_API_KEY = Deno.env.get("YOUTUBE_API_KEY");
|
||||
|
@ -55,8 +55,12 @@ export async function extractAuthorName(content: string) {
|
||||
|
||||
const author = chatCompletion.choices[0].message.content;
|
||||
|
||||
if (author !== "not found") return author;
|
||||
return "";
|
||||
if (
|
||||
author?.toLowerCase().includes("not") &&
|
||||
author?.toLowerCase().includes("found")
|
||||
) return "";
|
||||
|
||||
return author;
|
||||
}
|
||||
|
||||
export async function createTags(content: string) {
|
||||
|
@ -30,3 +30,26 @@ export function extractHashTags(inputString: string) {
|
||||
|
||||
return hashtags;
|
||||
}
|
||||
|
||||
export const isYoutubeLink = (link: string) => {
|
||||
try {
|
||||
const url = new URL(link);
|
||||
return ["youtu.be", "youtube.com","www.youtube.com" ].includes(url.hostname);
|
||||
} catch (err) {
|
||||
console.log(err);
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
export function extractYoutubeId(link: string) {
|
||||
const url = new URL(link);
|
||||
if (url.searchParams.has("v")) {
|
||||
const id = url.searchParams.get("v");
|
||||
|
||||
if (id?.length && id.length > 4) {
|
||||
return id;
|
||||
}
|
||||
}
|
||||
|
||||
return url.pathname.replace(/^\//, "");
|
||||
}
|
||||
|
86
lib/youtube.ts
Normal file
86
lib/youtube.ts
Normal file
@ -0,0 +1,86 @@
|
||||
import { YOUTUBE_API_KEY } from "@lib/env.ts";
|
||||
|
||||
const BASE_URL = "https://youtube.googleapis.com/youtube/v3/";
|
||||
|
||||
export interface APIResponse {
|
||||
kind: string;
|
||||
etag: string;
|
||||
items: Item[];
|
||||
pageInfo: PageInfo;
|
||||
}
|
||||
|
||||
export interface Item {
|
||||
kind: string;
|
||||
etag: string;
|
||||
id: string;
|
||||
snippet: Snippet;
|
||||
contentDetails: ContentDetails;
|
||||
statistics: Statistics;
|
||||
}
|
||||
|
||||
export interface Snippet {
|
||||
publishedAt: string;
|
||||
channelId: string;
|
||||
title: string;
|
||||
description: string;
|
||||
thumbnails: Thumbnails;
|
||||
channelTitle: string;
|
||||
tags: string[];
|
||||
categoryId: string;
|
||||
liveBroadcastContent: string;
|
||||
localized: Localized;
|
||||
}
|
||||
|
||||
export interface Thumbnails {
|
||||
default: Resolution;
|
||||
medium: Resolution;
|
||||
high: Resolution;
|
||||
standard: Resolution;
|
||||
maxres: Resolution;
|
||||
}
|
||||
|
||||
export interface Resolution {
|
||||
url: string;
|
||||
width: number;
|
||||
height: number;
|
||||
}
|
||||
|
||||
export interface Localized {
|
||||
title: string;
|
||||
description: string;
|
||||
}
|
||||
|
||||
export interface ContentDetails {
|
||||
duration: string;
|
||||
dimension: string;
|
||||
definition: string;
|
||||
caption: string;
|
||||
licensedContent: boolean;
|
||||
contentRating: ContentRating;
|
||||
projection: string;
|
||||
}
|
||||
|
||||
export interface ContentRating {}
|
||||
|
||||
export interface Statistics {
|
||||
viewCount: string;
|
||||
likeCount: string;
|
||||
favoriteCount: string;
|
||||
commentCount: string;
|
||||
}
|
||||
|
||||
export interface PageInfo {
|
||||
totalResults: number;
|
||||
resultsPerPage: number;
|
||||
}
|
||||
|
||||
export async function getYoutubeVideoDetails(
|
||||
id: string,
|
||||
): Promise<Item> {
|
||||
const response = await fetch(
|
||||
`${BASE_URL}videos?part=snippet%2CcontentDetails%2Cstatistics&id=${id}&key=${YOUTUBE_API_KEY}`,
|
||||
);
|
||||
const json = await response.json();
|
||||
|
||||
return json?.items[0];
|
||||
}
|
@ -7,7 +7,9 @@ import * as openai from "@lib/openai.ts";
|
||||
|
||||
import tds from "https://cdn.skypack.dev/turndown@7.1.1";
|
||||
//import { gfm } from "https://cdn.skypack.dev/@guyplusplus/turndown-plugin-gfm@1.0.7";
|
||||
import { createArticle } from "@lib/resource/articles.ts";
|
||||
import { Article, createArticle } from "@lib/resource/articles.ts";
|
||||
import { getYoutubeVideoDetails } from "@lib/youtube.ts";
|
||||
import { extractYoutubeId, formatDate, isYoutubeLink } from "@lib/string.ts";
|
||||
|
||||
const parser = new DOMParser();
|
||||
|
||||
@ -124,6 +126,47 @@ async function processCreateArticle(
|
||||
streamResponse.enqueue("id: " + newArticle.id);
|
||||
}
|
||||
|
||||
async function processCreateYoutubeVideo(
|
||||
{ fetchUrl, streamResponse }: {
|
||||
fetchUrl: string;
|
||||
streamResponse: ReturnType<typeof createStreamResponse>;
|
||||
},
|
||||
) {
|
||||
console.log("[api/article] create youtube article from url", {
|
||||
url: fetchUrl,
|
||||
});
|
||||
|
||||
streamResponse.enqueue("getting video infos from youtube api");
|
||||
|
||||
const id = extractYoutubeId(fetchUrl);
|
||||
|
||||
const video = await getYoutubeVideoDetails(id);
|
||||
|
||||
streamResponse.enqueue("shortening title with openai");
|
||||
const newId = await openai.shortenTitle(video.snippet.title);
|
||||
|
||||
const newArticle: Article = {
|
||||
name: video.snippet.title,
|
||||
id: newId || video.snippet.title,
|
||||
content: video.snippet.description,
|
||||
tags: video.snippet.tags.slice(0, 5),
|
||||
meta: {
|
||||
status: "not-finished",
|
||||
link: fetchUrl,
|
||||
author: video.snippet.channelTitle,
|
||||
date: new Date(video.snippet.publishedAt),
|
||||
},
|
||||
};
|
||||
|
||||
streamResponse.enqueue("creating article");
|
||||
|
||||
await createArticle(newArticle);
|
||||
|
||||
streamResponse.enqueue("finished");
|
||||
|
||||
streamResponse.enqueue("id: " + newArticle.id);
|
||||
}
|
||||
|
||||
export const handler: Handlers = {
|
||||
GET(req) {
|
||||
const url = new URL(req.url);
|
||||
@ -135,13 +178,25 @@ export const handler: Handlers = {
|
||||
|
||||
const streamResponse = createStreamResponse();
|
||||
|
||||
processCreateArticle({ fetchUrl, streamResponse }).then((article) => {
|
||||
console.log({ article });
|
||||
}).catch((err) => {
|
||||
console.log(err);
|
||||
}).finally(() => {
|
||||
streamResponse.cancel();
|
||||
});
|
||||
if (isYoutubeLink(fetchUrl)) {
|
||||
processCreateYoutubeVideo({ fetchUrl, streamResponse }).then(
|
||||
(article) => {
|
||||
console.log({ article });
|
||||
},
|
||||
).catch((err) => {
|
||||
console.log(err);
|
||||
}).finally(() => {
|
||||
streamResponse.cancel();
|
||||
});
|
||||
} else {
|
||||
processCreateArticle({ fetchUrl, streamResponse }).then((article) => {
|
||||
console.log({ article });
|
||||
}).catch((err) => {
|
||||
console.log(err);
|
||||
}).finally(() => {
|
||||
streamResponse.cancel();
|
||||
});
|
||||
}
|
||||
|
||||
return streamResponse.response;
|
||||
},
|
||||
|
@ -3,8 +3,9 @@ import { MainLayout } from "@components/layouts/main.tsx";
|
||||
import { Article, getArticle } from "@lib/resource/articles.ts";
|
||||
import { RecipeHero } from "@components/RecipeHero.tsx";
|
||||
import { KMenu } from "@islands/KMenu.tsx";
|
||||
import { isYoutubeLink, YoutubePlayer } from "@components/Youtube.tsx";
|
||||
import { YoutubePlayer } from "@components/Youtube.tsx";
|
||||
import { HashTags } from "@components/HashTags.tsx";
|
||||
import { isYoutubeLink } from "@lib/string.ts";
|
||||
|
||||
export const handler: Handlers<Article | null> = {
|
||||
async GET(_, ctx) {
|
||||
|
@ -31,7 +31,13 @@ export default function Greet(props: PageProps<Article[] | null>) {
|
||||
<KMenu type="main" context={false} />
|
||||
<Grid>
|
||||
{props.data?.map((doc) => {
|
||||
return <Card link={`/articles/${doc.id}`} title={doc.name} />;
|
||||
return (
|
||||
<Card
|
||||
image={"/placeholder.svg"}
|
||||
link={`/articles/${doc.id}`}
|
||||
title={doc.name}
|
||||
/>
|
||||
);
|
||||
})}
|
||||
</Grid>
|
||||
</MainLayout>
|
||||
|
@ -22,6 +22,7 @@ export default function Greet(props: PageProps<Movie>) {
|
||||
<RecipeHero
|
||||
data={movie}
|
||||
subline={[author, date.toString()]}
|
||||
editLink={`https://notes.max-richter.dev/Media/movies/${movie.id}`}
|
||||
backlink="/movies"
|
||||
/>
|
||||
<KMenu type="main" context={movie} />
|
||||
|
Loading…
x
Reference in New Issue
Block a user