feat: add ability to scrape youtube video
This commit is contained in:
parent
cebbb8af2b
commit
ba853342bd
@ -1,30 +1,10 @@
|
|||||||
export const isYoutubeLink = (link: string) => {
|
import { extractYoutubeId } from "@lib/string.ts";
|
||||||
try {
|
|
||||||
const url = new URL(link);
|
|
||||||
return ["youtu.be", "youtube.com"].includes(url.hostname);
|
|
||||||
} catch (err) {
|
|
||||||
console.log(err);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
function extractYoutubeId(link: string) {
|
|
||||||
const url = new URL(link);
|
|
||||||
if (url.searchParams.has("v")) {
|
|
||||||
const id = url.searchParams.get("v");
|
|
||||||
|
|
||||||
if (id?.length && id.length > 4) {
|
|
||||||
return id;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return url.pathname.replace(/^\//, "");
|
|
||||||
}
|
|
||||||
|
|
||||||
export const YoutubePlayer = ({ link }: { link: string }) => {
|
export const YoutubePlayer = ({ link }: { link: string }) => {
|
||||||
const id = extractYoutubeId(link);
|
const id = extractYoutubeId(link);
|
||||||
return (
|
return (
|
||||||
<iframe
|
<iframe
|
||||||
|
class="mb-6"
|
||||||
width="100%"
|
width="100%"
|
||||||
height="400px"
|
height="400px"
|
||||||
src={`https://www.youtube-nocookie.com/embed/${id}`}
|
src={`https://www.youtube-nocookie.com/embed/${id}`}
|
||||||
|
4
lib/cache/image.ts
vendored
4
lib/cache/image.ts
vendored
@ -69,8 +69,8 @@ export async function setImage(
|
|||||||
const pointerId = await hash(cacheKey);
|
const pointerId = await hash(cacheKey);
|
||||||
|
|
||||||
await cache.set(pointerId, clone);
|
await cache.set(pointerId, clone);
|
||||||
cache.expire(pointerId, 60 * 10);
|
cache.expire(pointerId, 60 * 60 * 24);
|
||||||
cache.expire(cacheKey, 60 * 10);
|
cache.expire(cacheKey, 60 * 60 * 24);
|
||||||
|
|
||||||
await cache.set(
|
await cache.set(
|
||||||
cacheKey,
|
cacheKey,
|
||||||
|
@ -3,3 +3,4 @@ export const REDIS_HOST = Deno.env.get("REDIS_HOST");
|
|||||||
export const REDIS_PASS = Deno.env.get("REDIS_PASS");
|
export const REDIS_PASS = Deno.env.get("REDIS_PASS");
|
||||||
export const TMDB_API_KEY = Deno.env.get("TMDB_API_KEY");
|
export const TMDB_API_KEY = Deno.env.get("TMDB_API_KEY");
|
||||||
export const OPENAI_API_KEY = Deno.env.get("OPENAI_API_KEY");
|
export const OPENAI_API_KEY = Deno.env.get("OPENAI_API_KEY");
|
||||||
|
export const YOUTUBE_API_KEY = Deno.env.get("YOUTUBE_API_KEY");
|
||||||
|
@ -55,8 +55,12 @@ export async function extractAuthorName(content: string) {
|
|||||||
|
|
||||||
const author = chatCompletion.choices[0].message.content;
|
const author = chatCompletion.choices[0].message.content;
|
||||||
|
|
||||||
if (author !== "not found") return author;
|
if (
|
||||||
return "";
|
author?.toLowerCase().includes("not") &&
|
||||||
|
author?.toLowerCase().includes("found")
|
||||||
|
) return "";
|
||||||
|
|
||||||
|
return author;
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function createTags(content: string) {
|
export async function createTags(content: string) {
|
||||||
|
@ -30,3 +30,26 @@ export function extractHashTags(inputString: string) {
|
|||||||
|
|
||||||
return hashtags;
|
return hashtags;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export const isYoutubeLink = (link: string) => {
|
||||||
|
try {
|
||||||
|
const url = new URL(link);
|
||||||
|
return ["youtu.be", "youtube.com","www.youtube.com" ].includes(url.hostname);
|
||||||
|
} catch (err) {
|
||||||
|
console.log(err);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
export function extractYoutubeId(link: string) {
|
||||||
|
const url = new URL(link);
|
||||||
|
if (url.searchParams.has("v")) {
|
||||||
|
const id = url.searchParams.get("v");
|
||||||
|
|
||||||
|
if (id?.length && id.length > 4) {
|
||||||
|
return id;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return url.pathname.replace(/^\//, "");
|
||||||
|
}
|
||||||
|
86
lib/youtube.ts
Normal file
86
lib/youtube.ts
Normal file
@ -0,0 +1,86 @@
|
|||||||
|
import { YOUTUBE_API_KEY } from "@lib/env.ts";
|
||||||
|
|
||||||
|
const BASE_URL = "https://youtube.googleapis.com/youtube/v3/";
|
||||||
|
|
||||||
|
export interface APIResponse {
|
||||||
|
kind: string;
|
||||||
|
etag: string;
|
||||||
|
items: Item[];
|
||||||
|
pageInfo: PageInfo;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface Item {
|
||||||
|
kind: string;
|
||||||
|
etag: string;
|
||||||
|
id: string;
|
||||||
|
snippet: Snippet;
|
||||||
|
contentDetails: ContentDetails;
|
||||||
|
statistics: Statistics;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface Snippet {
|
||||||
|
publishedAt: string;
|
||||||
|
channelId: string;
|
||||||
|
title: string;
|
||||||
|
description: string;
|
||||||
|
thumbnails: Thumbnails;
|
||||||
|
channelTitle: string;
|
||||||
|
tags: string[];
|
||||||
|
categoryId: string;
|
||||||
|
liveBroadcastContent: string;
|
||||||
|
localized: Localized;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface Thumbnails {
|
||||||
|
default: Resolution;
|
||||||
|
medium: Resolution;
|
||||||
|
high: Resolution;
|
||||||
|
standard: Resolution;
|
||||||
|
maxres: Resolution;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface Resolution {
|
||||||
|
url: string;
|
||||||
|
width: number;
|
||||||
|
height: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface Localized {
|
||||||
|
title: string;
|
||||||
|
description: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface ContentDetails {
|
||||||
|
duration: string;
|
||||||
|
dimension: string;
|
||||||
|
definition: string;
|
||||||
|
caption: string;
|
||||||
|
licensedContent: boolean;
|
||||||
|
contentRating: ContentRating;
|
||||||
|
projection: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface ContentRating {}
|
||||||
|
|
||||||
|
export interface Statistics {
|
||||||
|
viewCount: string;
|
||||||
|
likeCount: string;
|
||||||
|
favoriteCount: string;
|
||||||
|
commentCount: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface PageInfo {
|
||||||
|
totalResults: number;
|
||||||
|
resultsPerPage: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function getYoutubeVideoDetails(
|
||||||
|
id: string,
|
||||||
|
): Promise<Item> {
|
||||||
|
const response = await fetch(
|
||||||
|
`${BASE_URL}videos?part=snippet%2CcontentDetails%2Cstatistics&id=${id}&key=${YOUTUBE_API_KEY}`,
|
||||||
|
);
|
||||||
|
const json = await response.json();
|
||||||
|
|
||||||
|
return json?.items[0];
|
||||||
|
}
|
@ -7,7 +7,9 @@ import * as openai from "@lib/openai.ts";
|
|||||||
|
|
||||||
import tds from "https://cdn.skypack.dev/turndown@7.1.1";
|
import tds from "https://cdn.skypack.dev/turndown@7.1.1";
|
||||||
//import { gfm } from "https://cdn.skypack.dev/@guyplusplus/turndown-plugin-gfm@1.0.7";
|
//import { gfm } from "https://cdn.skypack.dev/@guyplusplus/turndown-plugin-gfm@1.0.7";
|
||||||
import { createArticle } from "@lib/resource/articles.ts";
|
import { Article, createArticle } from "@lib/resource/articles.ts";
|
||||||
|
import { getYoutubeVideoDetails } from "@lib/youtube.ts";
|
||||||
|
import { extractYoutubeId, formatDate, isYoutubeLink } from "@lib/string.ts";
|
||||||
|
|
||||||
const parser = new DOMParser();
|
const parser = new DOMParser();
|
||||||
|
|
||||||
@ -124,6 +126,47 @@ async function processCreateArticle(
|
|||||||
streamResponse.enqueue("id: " + newArticle.id);
|
streamResponse.enqueue("id: " + newArticle.id);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async function processCreateYoutubeVideo(
|
||||||
|
{ fetchUrl, streamResponse }: {
|
||||||
|
fetchUrl: string;
|
||||||
|
streamResponse: ReturnType<typeof createStreamResponse>;
|
||||||
|
},
|
||||||
|
) {
|
||||||
|
console.log("[api/article] create youtube article from url", {
|
||||||
|
url: fetchUrl,
|
||||||
|
});
|
||||||
|
|
||||||
|
streamResponse.enqueue("getting video infos from youtube api");
|
||||||
|
|
||||||
|
const id = extractYoutubeId(fetchUrl);
|
||||||
|
|
||||||
|
const video = await getYoutubeVideoDetails(id);
|
||||||
|
|
||||||
|
streamResponse.enqueue("shortening title with openai");
|
||||||
|
const newId = await openai.shortenTitle(video.snippet.title);
|
||||||
|
|
||||||
|
const newArticle: Article = {
|
||||||
|
name: video.snippet.title,
|
||||||
|
id: newId || video.snippet.title,
|
||||||
|
content: video.snippet.description,
|
||||||
|
tags: video.snippet.tags.slice(0, 5),
|
||||||
|
meta: {
|
||||||
|
status: "not-finished",
|
||||||
|
link: fetchUrl,
|
||||||
|
author: video.snippet.channelTitle,
|
||||||
|
date: new Date(video.snippet.publishedAt),
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
streamResponse.enqueue("creating article");
|
||||||
|
|
||||||
|
await createArticle(newArticle);
|
||||||
|
|
||||||
|
streamResponse.enqueue("finished");
|
||||||
|
|
||||||
|
streamResponse.enqueue("id: " + newArticle.id);
|
||||||
|
}
|
||||||
|
|
||||||
export const handler: Handlers = {
|
export const handler: Handlers = {
|
||||||
GET(req) {
|
GET(req) {
|
||||||
const url = new URL(req.url);
|
const url = new URL(req.url);
|
||||||
@ -135,13 +178,25 @@ export const handler: Handlers = {
|
|||||||
|
|
||||||
const streamResponse = createStreamResponse();
|
const streamResponse = createStreamResponse();
|
||||||
|
|
||||||
processCreateArticle({ fetchUrl, streamResponse }).then((article) => {
|
if (isYoutubeLink(fetchUrl)) {
|
||||||
console.log({ article });
|
processCreateYoutubeVideo({ fetchUrl, streamResponse }).then(
|
||||||
}).catch((err) => {
|
(article) => {
|
||||||
console.log(err);
|
console.log({ article });
|
||||||
}).finally(() => {
|
},
|
||||||
streamResponse.cancel();
|
).catch((err) => {
|
||||||
});
|
console.log(err);
|
||||||
|
}).finally(() => {
|
||||||
|
streamResponse.cancel();
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
processCreateArticle({ fetchUrl, streamResponse }).then((article) => {
|
||||||
|
console.log({ article });
|
||||||
|
}).catch((err) => {
|
||||||
|
console.log(err);
|
||||||
|
}).finally(() => {
|
||||||
|
streamResponse.cancel();
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
return streamResponse.response;
|
return streamResponse.response;
|
||||||
},
|
},
|
||||||
|
@ -3,8 +3,9 @@ import { MainLayout } from "@components/layouts/main.tsx";
|
|||||||
import { Article, getArticle } from "@lib/resource/articles.ts";
|
import { Article, getArticle } from "@lib/resource/articles.ts";
|
||||||
import { RecipeHero } from "@components/RecipeHero.tsx";
|
import { RecipeHero } from "@components/RecipeHero.tsx";
|
||||||
import { KMenu } from "@islands/KMenu.tsx";
|
import { KMenu } from "@islands/KMenu.tsx";
|
||||||
import { isYoutubeLink, YoutubePlayer } from "@components/Youtube.tsx";
|
import { YoutubePlayer } from "@components/Youtube.tsx";
|
||||||
import { HashTags } from "@components/HashTags.tsx";
|
import { HashTags } from "@components/HashTags.tsx";
|
||||||
|
import { isYoutubeLink } from "@lib/string.ts";
|
||||||
|
|
||||||
export const handler: Handlers<Article | null> = {
|
export const handler: Handlers<Article | null> = {
|
||||||
async GET(_, ctx) {
|
async GET(_, ctx) {
|
||||||
|
@ -31,7 +31,13 @@ export default function Greet(props: PageProps<Article[] | null>) {
|
|||||||
<KMenu type="main" context={false} />
|
<KMenu type="main" context={false} />
|
||||||
<Grid>
|
<Grid>
|
||||||
{props.data?.map((doc) => {
|
{props.data?.map((doc) => {
|
||||||
return <Card link={`/articles/${doc.id}`} title={doc.name} />;
|
return (
|
||||||
|
<Card
|
||||||
|
image={"/placeholder.svg"}
|
||||||
|
link={`/articles/${doc.id}`}
|
||||||
|
title={doc.name}
|
||||||
|
/>
|
||||||
|
);
|
||||||
})}
|
})}
|
||||||
</Grid>
|
</Grid>
|
||||||
</MainLayout>
|
</MainLayout>
|
||||||
|
@ -22,6 +22,7 @@ export default function Greet(props: PageProps<Movie>) {
|
|||||||
<RecipeHero
|
<RecipeHero
|
||||||
data={movie}
|
data={movie}
|
||||||
subline={[author, date.toString()]}
|
subline={[author, date.toString()]}
|
||||||
|
editLink={`https://notes.max-richter.dev/Media/movies/${movie.id}`}
|
||||||
backlink="/movies"
|
backlink="/movies"
|
||||||
/>
|
/>
|
||||||
<KMenu type="main" context={movie} />
|
<KMenu type="main" context={movie} />
|
||||||
|
Loading…
x
Reference in New Issue
Block a user