diff --git a/components/icons.tsx b/components/icons.tsx index abaa363..6d54cbc 100644 --- a/components/icons.tsx +++ b/components/icons.tsx @@ -17,3 +17,4 @@ export { default as IconSearch } from "https://deno.land/x/tabler_icons_tsx@0.0. export { default as IconGhost } from "https://deno.land/x/tabler_icons_tsx@0.0.3/tsx/ghost.tsx"; export { default as IconBrandYoutube } from "https://deno.land/x/tabler_icons_tsx@0.0.3/tsx/brand-youtube.tsx"; export { default as IconWand } from "https://deno.land/x/tabler_icons_tsx@0.0.5/tsx/wand.tsx"; +export { default as IconAlertCircle } from "https://deno.land/x/tabler_icons_tsx@0.0.5/tsx/alert-circle.tsx"; \ No newline at end of file diff --git a/deno.json b/deno.json index 697a747..a32da3f 100644 --- a/deno.json +++ b/deno.json @@ -48,6 +48,7 @@ "camelcase-css": "npm:camelcase-css", "thumbhash": "npm:thumbhash@^0.1.1", "tsx": "npm:tsx@^4.19.2", + "turndown": "npm:turndown@^7.2.2", "yaml": "https://deno.land/std@0.197.0/yaml/mod.ts", "zod": "npm:zod@^3.24.1", "fs": "https://deno.land/std/fs/mod.ts" diff --git a/fresh.gen.ts b/fresh.gen.ts index e965132..275d6ef 100644 --- a/fresh.gen.ts +++ b/fresh.gen.ts @@ -11,6 +11,7 @@ import * as $admin_log_index from "./routes/admin/log/index.tsx"; import * as $admin_performance_index from "./routes/admin/performance/index.tsx"; import * as $api_articles_name_ from "./routes/api/articles/[name].ts"; import * as $api_articles_create_index from "./routes/api/articles/create/index.ts"; +import * as $api_articles_enhance_name_ from "./routes/api/articles/enhance/[name].ts"; import * as $api_articles_index from "./routes/api/articles/index.ts"; import * as $api_auth_callback from "./routes/api/auth/callback.ts"; import * as $api_auth_login from "./routes/api/auth/login.ts"; @@ -57,6 +58,7 @@ import * as $KMenu_commands_create_movie from "./islands/KMenu/commands/create_m import * as $KMenu_commands_create_recipe from "./islands/KMenu/commands/create_recipe.ts"; import * as $KMenu_commands_create_recommendations from "./islands/KMenu/commands/create_recommendations.ts"; import * as $KMenu_commands_create_series from "./islands/KMenu/commands/create_series.ts"; +import * as $KMenu_commands_enhance_article_infos from "./islands/KMenu/commands/enhance_article_infos.ts"; import * as $KMenu_types from "./islands/KMenu/types.ts"; import * as $Link from "./islands/Link.tsx"; import * as $Recommendations from "./islands/Recommendations.tsx"; @@ -74,6 +76,7 @@ const manifest = { "./routes/admin/performance/index.tsx": $admin_performance_index, "./routes/api/articles/[name].ts": $api_articles_name_, "./routes/api/articles/create/index.ts": $api_articles_create_index, + "./routes/api/articles/enhance/[name].ts": $api_articles_enhance_name_, "./routes/api/articles/index.ts": $api_articles_index, "./routes/api/auth/callback.ts": $api_auth_callback, "./routes/api/auth/login.ts": $api_auth_login, @@ -127,6 +130,8 @@ const manifest = { "./islands/KMenu/commands/create_recommendations.ts": $KMenu_commands_create_recommendations, "./islands/KMenu/commands/create_series.ts": $KMenu_commands_create_series, + "./islands/KMenu/commands/enhance_article_infos.ts": + $KMenu_commands_enhance_article_infos, "./islands/KMenu/types.ts": $KMenu_types, "./islands/Link.tsx": $Link, "./islands/Recommendations.tsx": $Recommendations, diff --git a/islands/KMenu.tsx b/islands/KMenu.tsx index a55ca5c..f2c09c6 100644 --- a/islands/KMenu.tsx +++ b/islands/KMenu.tsx @@ -21,7 +21,7 @@ const KMenuEntry = ( : "text-gray-400" }`} > - {entry?.icon && icons[entry.icon]({ class: "w-4 h-4 mr-1" })} + {entry?.icon && icons[entry.icon]({ class: "min-w-4 h-4 mr-1" })} {entry.title} ); @@ -168,13 +168,15 @@ export const KMenu = ( style={{ background: "#2B2930", color: "#818181" }} >
{(activeState.value === "normal" || activeState.value === "input") && @@ -198,12 +200,18 @@ export const KMenu = ( )} {activeState.value === "loading" && (
- + {loadingText.value || "Loading..."}
)} + {activeState.value === "error" && ( +
+ + {loadingText.value || "An error occurred"} +
+ )}
- {activeState.value === "normal" && + {(activeState.value === "normal" || activeState.value === "input") && (
= { main: { @@ -77,6 +78,7 @@ export const menus: Record = { createNewSeries, createNewRecipe, addMovieInfos, + enhanceArticleInfo, // updateAllRecommendations, ], }, diff --git a/islands/KMenu/commands/add_movie_infos.ts b/islands/KMenu/commands/add_movie_infos.ts index 11d02f2..aa9e415 100644 --- a/islands/KMenu/commands/add_movie_infos.ts +++ b/islands/KMenu/commands/add_movie_infos.ts @@ -8,39 +8,53 @@ export const addMovieInfos: MenuEntry = { meta: "", icon: "IconReportSearch", cb: async (state, context) => { - state.activeState.value = "loading"; - const movie = context as ReviewResource; + try { + state.activeState.value = "loading"; + const movie = context as ReviewResource; - const query = movie.name; + const query = movie.name; - const response = await fetch( - `/api/tmdb/query?q=${encodeURIComponent(query)}`, - ); + const response = await fetch( + `/api/tmdb/query?q=${encodeURIComponent(query)}`, + ); - const json = await response.json() as TMDBMovie[]; + if (!response.ok) { + throw new Error(await response.text()); + } - const menuID = `result/${movie.name}`; + const json = await response.json() as TMDBMovie[]; - state.menus[menuID] = { - title: "Select", - entries: json.map((m) => ({ - title: `${m.title} released ${m.release_date}`, - cb: async () => { - state.activeState.value = "loading"; - await fetch(`/api/movies/enhance/${movie.name}/`, { - method: "POST", - body: JSON.stringify({ tmdbId: m.id }), - }); - state.visible.value = false; - state.activeState.value = "normal"; - globalThis.location.reload(); - }, - })), - }; + const menuID = `result/${movie.name}`; - state.activeMenu.value = menuID; - state.commandInput.value = ""; - state.activeState.value = "normal"; + state.menus[menuID] = { + title: "Select", + entries: json.map((m) => ({ + title: `${m.title} released ${m.release_date}`, + cb: async () => { + try { + state.activeState.value = "loading"; + await fetch(`/api/movies/enhance/${movie.name}/`, { + method: "POST", + body: JSON.stringify({ tmdbId: m.id }), + }); + state.visible.value = false; + state.activeState.value = "normal"; + globalThis.location.reload(); + } catch (e) { + state.activeState.value = "error"; + state.loadingText.value = e.message; + } + }, + })), + }; + + state.activeMenu.value = menuID; + state.commandInput.value = ""; + state.activeState.value = "normal"; + } catch (e) { + state.activeState.value = "error"; + state.loadingText.value = e.message; + } }, visible: () => { const loc = globalThis["location"]; diff --git a/islands/KMenu/commands/add_series_infos.ts b/islands/KMenu/commands/add_series_infos.ts index 9461547..f4bcbcf 100644 --- a/islands/KMenu/commands/add_series_infos.ts +++ b/islands/KMenu/commands/add_series_infos.ts @@ -8,39 +8,53 @@ export const addSeriesInfo: MenuEntry = { meta: "", icon: "IconReportSearch", cb: async (state, context) => { - state.activeState.value = "loading"; - const series = context as ReviewResource; + try { + state.activeState.value = "loading"; + const series = context as ReviewResource; - const query = series.name; + const query = series.name; - const response = await fetch( - `/api/tmdb/query?q=${encodeURIComponent(query)}&type=serie`, - ); + const response = await fetch( + `/api/tmdb/query?q=${encodeURIComponent(query)}&type=serie`, + ); - const json = await response.json() as TMDBSeries[]; + if (!response.ok) { + throw new Error(await response.text()); + } - const menuID = `result/${series.name}`; + const json = await response.json() as TMDBSeries[]; - state.menus[menuID] = { - title: "Select", - entries: json.map((m) => ({ - title: `${m.name || m.original_name} released ${m.first_air_date}`, - cb: async () => { - state.activeState.value = "loading"; - await fetch(`/api/series/enhance/${series.name}/`, { - method: "POST", - body: JSON.stringify({ tmdbId: m.id }), - }); - state.visible.value = false; - state.activeState.value = "normal"; - //window.location.reload(); - }, - })), - }; + const menuID = `result/${series.name}`; - state.commandInput.value = ""; - state.activeMenu.value = menuID; - state.activeState.value = "normal"; + state.menus[menuID] = { + title: "Select", + entries: json.map((m) => ({ + title: `${m.name || m.original_name} released ${m.first_air_date}`, + cb: async () => { + try { + state.activeState.value = "loading"; + await fetch(`/api/series/enhance/${series.name}/`, { + method: "POST", + body: JSON.stringify({ tmdbId: m.id }), + }); + state.visible.value = false; + state.activeState.value = "normal"; + //window.location.reload(); + } catch (e) { + state.activeState.value = "error"; + state.loadingText.value = e.message; + } + }, + })), + }; + + state.commandInput.value = ""; + state.activeMenu.value = menuID; + state.activeState.value = "normal"; + } catch (e) { + state.activeState.value = "error"; + state.loadingText.value = e.message; + } }, visible: () => { const loc = globalThis["location"]; diff --git a/islands/KMenu/commands/create_article.ts b/islands/KMenu/commands/create_article.ts index 87852ae..aca67c5 100644 --- a/islands/KMenu/commands/create_article.ts +++ b/islands/KMenu/commands/create_article.ts @@ -22,14 +22,16 @@ export const createNewArticle: MenuEntry = { state.activeState.value = "loading"; fetchStream("/api/articles/create?url=" + value, (chunk) => { - if (chunk.startsWith("id:")) { + if (chunk.type === "error") { + state.activeState.value = "error"; + state.loadingText.value = chunk.message; + } else if (chunk.type === "finished") { state.loadingText.value = "Finished"; setTimeout(() => { - window.location.href = "/articles/" + - chunk.replace("id:", "").trim(); + globalThis.location.href = "/articles/" + chunk.url; }, 500); } else { - state.loadingText.value = chunk; + state.loadingText.value = chunk.message; } }); } diff --git a/islands/KMenu/commands/create_movie.ts b/islands/KMenu/commands/create_movie.ts index d2e2b8b..0496c74 100644 --- a/islands/KMenu/commands/create_movie.ts +++ b/islands/KMenu/commands/create_movie.ts @@ -31,35 +31,52 @@ export const createNewMovie: MenuEntry = { let currentQuery: string; const search = debounce(async function search(query: string) { - currentQuery = query; - if (query.length < 2) { - return; + try { + currentQuery = query; + if (query.length < 2) { + return; + } + + const response = await fetch("/api/tmdb/query?q=" + query); + + if (!response.ok) { + throw new Error(await response.text()); + } + + const movies = await response.json() as TMDBMovie[]; + + if (query !== currentQuery) return; + + state.menus["input_link"] = { + title: "Search", + entries: movies.map((r) => { + return { + title: `${r.title} - ${r.release_date}`, + cb: async () => { + try { + state.activeState.value = "loading"; + const response = await fetch("/api/movies/" + r.id, { + method: "POST", + }); + if (!response.ok) { + throw new Error(await response.text()); + } + const movie = await response.json() as ReviewResource; + unsub(); + globalThis.location.href = "/movies/" + movie.name; + } catch (e) { + state.activeState.value = "error"; + state.loadingText.value = e.message; + } + }, + }; + }), + }; + state.activeMenu.value = "input_link"; + } catch (e) { + state.activeState.value = "error"; + state.loadingText.value = e.message; } - - const response = await fetch("/api/tmdb/query?q=" + query); - - const movies = await response.json() as TMDBMovie[]; - - if (query !== currentQuery) return; - - state.menus["input_link"] = { - title: "Search", - entries: movies.map((r) => { - return { - title: `${r.title} - ${r.release_date}`, - cb: async () => { - state.activeState.value = "loading"; - const response = await fetch("/api/movies/" + r.id, { - method: "POST", - }); - const movie = await response.json() as ReviewResource; - unsub(); - globalThis.location.href = "/movies/" + movie.name; - }, - }; - }), - }; - state.activeMenu.value = "input_link"; }, 500); const unsub = state.commandInput.subscribe((value) => { diff --git a/islands/KMenu/commands/create_recipe.ts b/islands/KMenu/commands/create_recipe.ts index 0464faf..c1a8f02 100644 --- a/islands/KMenu/commands/create_recipe.ts +++ b/islands/KMenu/commands/create_recipe.ts @@ -21,15 +21,17 @@ export const createNewRecipe: MenuEntry = { state.activeState.value = "loading"; - fetchStream("/api/recipes/create?url=" + value, (chunk) => { - if (chunk.startsWith("id:")) { + fetchStream("/api/recipes/create?url=" + value, (msg) => { + if (msg.type === "error") { + state.activeState.value = "error"; + state.loadingText.value = msg.message; + } else if (msg.type === "finished") { state.loadingText.value = "Finished"; setTimeout(() => { - globalThis.location.href = "/recipes/" + - chunk.replace("id:", "").trim(); + globalThis.location.href = "/recipes/" + msg.url; }, 500); } else { - state.loadingText.value = chunk; + state.loadingText.value = msg.message; } }); } diff --git a/islands/KMenu/commands/create_recommendations.ts b/islands/KMenu/commands/create_recommendations.ts index 698ee57..b406e02 100644 --- a/islands/KMenu/commands/create_recommendations.ts +++ b/islands/KMenu/commands/create_recommendations.ts @@ -10,12 +10,15 @@ export const updateAllRecommendations: MenuEntry = { state.activeState.value = "loading"; fetchStream("/api/recommendation/all", (chunk) => { - if (chunk.toLowerCase().includes("finish")) { + if (chunk.type === "error") { + state.activeState.value = "error"; + state.loadingText.value = chunk.message; + } else if (chunk.type === "finished") { setTimeout(() => { - window.location.reload(); + globalThis.location.reload(); }, 500); } else { - state.loadingText.value = chunk; + state.loadingText.value = chunk.message; } }); }, diff --git a/islands/KMenu/commands/create_series.ts b/islands/KMenu/commands/create_series.ts index 7bd2f26..27a4f7c 100644 --- a/islands/KMenu/commands/create_series.ts +++ b/islands/KMenu/commands/create_series.ts @@ -31,42 +31,55 @@ export const createNewSeries: MenuEntry = { let currentQuery: string; const search = debounce(async function search(query: string) { - currentQuery = query; - if (query.length < 2) { - return; + try { + currentQuery = query; + if (query.length < 2) { + return; + } + + const response = await fetch( + "/api/tmdb/query?q=" + query + "&type=series", + ); + + if (!response.ok) { + throw new Error(await response.text()); + } + + const series = await response.json() as TMDBSeries[]; + + if (query !== currentQuery) return; + + state.menus["input_link"] = { + title: "Search", + entries: series.map((r) => { + return { + title: `${r.name} - ${r.first_air_date}`, + cb: async () => { + try { + state.activeState.value = "loading"; + const response = await fetch("/api/series/" + r.id, { + method: "POST", + }); + if (!response.ok) { + throw new Error(await response.text()); + } + const series = await response.json() as ReviewResource; + unsub(); + globalThis.location.href = "/series/" + series.name; + } catch (e) { + state.activeState.value = "error"; + state.loadingText.value = e.message; + } + }, + }; + }), + }; + state.commandInput.value = ""; + state.activeMenu.value = "input_link"; + } catch (e) { + state.activeState.value = "error"; + state.loadingText.value = e.message; } - - const response = await fetch( - "/api/tmdb/query?q=" + query + "&type=series", - ); - - const series = await response.json() as TMDBSeries[]; - - if (query !== currentQuery) return; - - state.menus["input_link"] = { - title: "Search", - entries: series.map((r) => { - return { - title: `${r.name} - ${r.first_air_date}`, - cb: async () => { - try { - state.activeState.value = "loading"; - const response = await fetch("/api/series/" + r.id, { - method: "POST", - }); - const series = await response.json() as ReviewResource; - unsub(); - globalThis.location.href = "/series/" + series.name; - } catch (_e) { - state.activeState.value = "normal"; - } - }, - }; - }), - }; - state.commandInput.value = ""; - state.activeMenu.value = "input_link"; }, 500); const unsub = state.commandInput.subscribe((value) => { diff --git a/islands/KMenu/commands/enhance_article_infos.ts b/islands/KMenu/commands/enhance_article_infos.ts new file mode 100644 index 0000000..89a71c4 --- /dev/null +++ b/islands/KMenu/commands/enhance_article_infos.ts @@ -0,0 +1,41 @@ +import { getCookie } from "@lib/string.ts"; +import { MenuEntry } from "../types.ts"; +import { ArticleResource } from "@lib/marka/schema.ts"; +import { fetchStream } from "@lib/helpers.ts"; + +export const enhanceArticleInfo: MenuEntry = { + title: "Enhance Article Info", + meta: "Update metadata and content from source url", + icon: "IconReportSearch", + cb: (state, context) => { + state.activeState.value = "loading"; + const article = context as ArticleResource; + + fetchStream( + `/api/articles/enhance/${article.name}/`, + (chunk) => { + if (chunk.type === "error") { + state.activeState.value = "error"; + state.loadingText.value = chunk.message; + } else if (chunk.type == "finished") { + state.loadingText.value = "Finished"; + setTimeout(() => { + state.visible.value = false; + state.activeState.value = "normal"; + globalThis.location.reload(); + }, 500); + } else { + state.loadingText.value = chunk.message; + } + }, + { method: "POST" }, + ); + }, + visible: () => { + const loc = globalThis["location"]; + if (!getCookie("session_cookie")) return false; + + return (loc?.pathname?.includes("article") && + !loc.pathname.endsWith("articles")); + }, +}; diff --git a/lib/env.ts b/lib/env.ts index 7fd66dc..fa07de2 100644 --- a/lib/env.ts +++ b/lib/env.ts @@ -7,6 +7,7 @@ export const PROXY_PASSWORD = Deno.env.get("PROXY_PASSWORD"); export const TMDB_API_KEY = Deno.env.get("TMDB_API_KEY"); export const OPENAI_API_KEY = Deno.env.get("OPENAI_API_KEY"); export const YOUTUBE_API_KEY = Deno.env.get("YOUTUBE_API_KEY"); +export const UNSPLASH_API_KEY = Deno.env.get("UNSPLASH_API_KEY"); export const TELEGRAM_API_KEY = Deno.env.get("TELEGRAM_API_KEY")!; export const GITEA_SERVER = Deno.env.get("GITEA_SERVER"); diff --git a/lib/helpers.ts b/lib/helpers.ts index 959729e..ca8322b 100644 --- a/lib/helpers.ts +++ b/lib/helpers.ts @@ -31,19 +31,54 @@ export const fixRenderedMarkdown = (content: string) => { }); }; -export async function fetchStream(url: string, cb: (chunk: string) => void) { - const response = await fetch(url); - const reader = response?.body?.getReader(); - if (reader) { - while (true) { - const { done, value } = await reader.read(); - if (done) return; - const data = new TextDecoder().decode(value); - data - .split("$") - .filter((d) => d && d.length) - .map((d) => cb(Array.isArray(d) ? d[0] : d)); - } +type StreamMessage = { + type: "info"; + message: string; +} | { + type: "error"; + message: string; +} | { + type: "warning"; + message: string; +} | { + type: "finished"; + url: string; +}; + +export async function fetchStream( + url: string, + cb: (chunk: StreamMessage) => void, + init?: RequestInit, +) { + const res = await fetch(url, init); + if (!res.body) return; + + let buffer = ""; + const reader = res.body + .pipeThrough(new TextDecoderStream()) + .pipeThrough( + new TransformStream({ + transform(chunk, controller) { + buffer += chunk; + let idx; + while ((idx = buffer.indexOf("\n")) >= 0) { + const line = buffer.slice(0, idx).trim(); + buffer = buffer.slice(idx + 1); + if (line) controller.enqueue(line); + } + }, + flush(controller) { + const line = buffer.trim(); + if (line) controller.enqueue(line); + }, + }), + ) + .getReader(); + + while (true) { + const { done, value } = await reader.read(); + if (done) break; + cb(JSON.parse(value)); } } @@ -58,32 +93,53 @@ export function hashString(message: string) { } export const createStreamResponse = () => { - let controller: ReadableStreamController; - const body = new ReadableStream({ - start(cont) { - controller = cont; + const encoder = new TextEncoder(); + let controller: ReadableStreamDefaultController; + + const body = new ReadableStream({ + start(c) { + controller = c; }, }); const response = new Response(body, { headers: { - "content-type": "text/plain", + // newline-delimited JSON + "content-type": "application/x-ndjson; charset=utf-8", + // prevent intermediaries from buffering/transforming + "cache-control": "no-cache, no-transform", "x-content-type-options": "nosniff", + // nginx hint to disable proxy buffering + "x-accel-buffering": "no", + // if you control compression, keep it off for streams + // "content-encoding": "identity", }, }); - function cancel() { - controller.close(); + const send = (obj: unknown) => { + controller.enqueue(encoder.encode(JSON.stringify(obj) + "\n")); // ← delimiter + }; + const cancel = () => controller.close(); + + function info(message: string) { + return send({ type: "info", message }); } - function enqueue(chunk: string) { - controller?.enqueue(new TextEncoder().encode("$" + chunk)); + function error(message: string) { + return send({ type: "error", message }); + } + + function warning(message: string) { + return send({ type: "warning", message }); } return { response, cancel, - enqueue, + send, + info, + error, + warning, }; }; diff --git a/lib/log/index.ts b/lib/log/index.ts index 03194e8..b51c032 100644 --- a/lib/log/index.ts +++ b/lib/log/index.ts @@ -38,13 +38,13 @@ export function createLogger(scope: string, _options?: LoggerOptions): Logger { export function loggerFromStream(stream: StreamResponse) { return { debug: (...data: unknown[]) => - stream.enqueue(`${data.length > 1 ? data.join(" ") : data[0]}`), + stream.info(`${data.length > 1 ? data.join(" ") : data[0]}`), info: (...data: unknown[]) => - stream.enqueue(`${data.length > 1 ? data.join(" ") : data[0]}`), + stream.info(`${data.length > 1 ? data.join(" ") : data[0]}`), error: (...data: unknown[]) => - stream.enqueue(`[ERROR]: ${data.length > 1 ? data.join(" ") : data[0]}`), + stream.error(`[ERROR]: ${data.length > 1 ? data.join(" ") : data[0]}`), warn: (...data: unknown[]) => - stream.enqueue(`[WARN]: ${data.length > 1 ? data.join(" ") : data[0]}`), + stream.warning(`[WARN]: ${data.length > 1 ? data.join(" ") : data[0]}`), }; } diff --git a/lib/marka/index.ts b/lib/marka/index.ts index 1b362a4..e121955 100644 --- a/lib/marka/index.ts +++ b/lib/marka/index.ts @@ -106,8 +106,11 @@ export async function createResource( body: isJson ? JSON.stringify(content) : content, }); if (!response.ok) { + const text = await response.text(); throw new Error( - `Failed to create resource (resources/${path}) : ${response.status}`, + `failed to create resource (resources/${path}): ${ + text || response.status + }`, ); } return response.json(); diff --git a/lib/openai.ts b/lib/openai.ts index 969db6b..e84b360 100644 --- a/lib/openai.ts +++ b/lib/openai.ts @@ -195,6 +195,23 @@ respond with a plain unordered list each item starting with the year the movie w return recommendations; }; +export async function createUnsplashSearchTerm(content: string) { + if (!openAI) return; + const chatCompletion = await openAI.chat.completions.create({ + model: model, + messages: [ + { + role: "system", + content: + "Please respond with a search term for unsplash for the following article", + }, + { role: "user", content: content.slice(0, 10_000) }, + ], + }); + + return chatCompletion.choices[0].message.content?.toLowerCase(); +} + export async function createTags(content: string) { if (!openAI) return; const chatCompletion = await openAI.chat.completions.create({ diff --git a/lib/playwright.ts b/lib/playwright.ts index a6de683..e05666d 100644 --- a/lib/playwright.ts +++ b/lib/playwright.ts @@ -9,7 +9,7 @@ export async function fetchHtmlWithPlaywright( fetchUrl: string, streamResponse: ReturnType, ): Promise { - streamResponse.enqueue("booting up playwright"); + streamResponse.info("booting up playwright"); const config: Parameters[0] = {}; if (env.PROXY_SERVER) { @@ -24,7 +24,7 @@ export async function fetchHtmlWithPlaywright( // Launch the Playwright browser const browser = await firefox.launch(config); - streamResponse.enqueue("fetching html"); + streamResponse.info("fetching html"); try { // Open a new browser context and page @@ -42,7 +42,7 @@ export async function fetchHtmlWithPlaywright( return html; } catch (error) { - streamResponse.enqueue("error fetching html"); + streamResponse.error("error fetching html"); console.error(error); return ""; } finally { diff --git a/lib/unsplash.ts b/lib/unsplash.ts new file mode 100644 index 0000000..0fc1167 --- /dev/null +++ b/lib/unsplash.ts @@ -0,0 +1,29 @@ +import { UNSPLASH_API_KEY } from "./env.ts"; + +const API_URL = "https://api.unsplash.com"; + +export async function getImageBySearchTerm( + searchTerm: string, +): Promise { + if (!UNSPLASH_API_KEY) { + throw new Error("UNSPLASH_API_KEY is not set"); + } + + const url = new URL("/search/photos", API_URL); + url.searchParams.append("query", searchTerm); + url.searchParams.append("per_page", "1"); + url.searchParams.append("orientation", "landscape"); + + const response = await fetch(url.toString(), { + headers: { + Authorization: `Client-ID ${UNSPLASH_API_KEY}`, + }, + }); + + if (!response.ok) { + throw new Error(`Unsplash API request failed: ${response.statusText}`); + } + + const data = await response.json(); + return data.results[0]?.urls?.regular; +} diff --git a/lib/webScraper.ts b/lib/webScraper.ts index 5fd85a6..a49e513 100644 --- a/lib/webScraper.ts +++ b/lib/webScraper.ts @@ -1,6 +1,8 @@ import { JSDOM } from "jsdom"; import { fetchHtmlWithPlaywright } from "./playwright.ts"; import { createStreamResponse } from "./helpers.ts"; +import { Defuddle } from "defuddle/node"; +import TurndownService from "turndown"; /** * Mutates the given JSDOM instance: rewrites all relevant URL-bearing attributes @@ -164,6 +166,8 @@ function absolutizeMetaRefresh(content: string, base: string): string { return `${delay}; url=${abs}`; } +const turndownService = new TurndownService(); + export async function webScrape( url: string, streamResponse: ReturnType, @@ -172,5 +176,12 @@ export async function webScrape( const html = await fetchHtmlWithPlaywright(url, streamResponse); const dom = new JSDOM(html); absolutizeDomUrls(dom, u.origin); - return dom; + + const result = await Defuddle(dom, url); + + return { + ...result, + dom, + markdown: turndownService.turndown(result.content), + }; } diff --git a/routes/api/articles/create/index.ts b/routes/api/articles/create/index.ts index d4bcef6..6597070 100644 --- a/routes/api/articles/create/index.ts +++ b/routes/api/articles/create/index.ts @@ -3,6 +3,7 @@ import { Defuddle } from "defuddle/node"; import { AccessDeniedError, BadRequestError } from "@lib/errors.ts"; import { createStreamResponse, isValidUrl } from "@lib/helpers.ts"; import * as openai from "@lib/openai.ts"; +import * as unsplash from "@lib/unsplash.ts"; import { getYoutubeVideoDetails } from "@lib/youtube.ts"; import { extractYoutubeId, @@ -19,6 +20,35 @@ import { fileExtension } from "https://deno.land/x/file_extension@v2.1.0/mod.ts" const log = createLogger("api/article"); +async function getUnsplashCoverImage( + content: string, + streamResponse: ReturnType, +): Promise { + try { + streamResponse.info("creating unsplash search term"); + const searchTerm = await openai.createUnsplashSearchTerm(content); + if (!searchTerm) return; + streamResponse.info(`searching for ${searchTerm}`); + const unsplashUrl = await unsplash.getImageBySearchTerm(searchTerm); + return unsplashUrl; + } catch (e) { + log.error("Failed to get unsplash cover image", e); + return undefined; + } +} + +function ext(str: string) { + try { + const u = new URL(str); + if (u.searchParams.has("fm")) { + return u.searchParams.get("fm")!; + } + return fileExtension(u.pathname); + } catch (_e) { + return fileExtension(str); + } +} + async function fetchAndStoreCover( imageUrl: string | undefined, title: string, @@ -26,12 +56,12 @@ async function fetchAndStoreCover( ): Promise { if (!imageUrl) return; const imagePath = `articles/images/${safeFileName(title)}_cover.${ - fileExtension(imageUrl) + ext(imageUrl) }`; try { - streamResponse?.enqueue("downloading image"); + streamResponse?.info("downloading image"); const res = await fetch(imageUrl); - streamResponse?.enqueue("saving image"); + streamResponse?.info("saving image"); if (!res.ok) { console.log(`Failed to download remote image: ${imageUrl}`, res.status); return; @@ -53,38 +83,43 @@ async function processCreateArticle( ) { log.info("create article from url", { url: fetchUrl }); - streamResponse.enqueue("downloading article"); + streamResponse.info("downloading article"); - const doc = await webScrape(fetchUrl, streamResponse); + const result = await webScrape(fetchUrl, streamResponse); - const result = await Defuddle(doc, fetchUrl, { - markdown: true, - }); + log.debug("downloaded and parse parsed", result); - log.debug("downloaded and parse parsed", { - ...result, - url: fetchUrl, - content: result.content.slice(0, 200), - }); + streamResponse.info("parsed article, creating tags with openai"); - streamResponse.enqueue("parsed article, creating tags with openai"); + const aiMeta = await openai.extractArticleMetadata(result.markdown); - const aiMeta = await openai.extractArticleMetadata(result.content); - - streamResponse.enqueue("postprocessing article"); + streamResponse.info("postprocessing article"); const title = result?.title || aiMeta?.headline || ""; - const coverImagePath = await fetchAndStoreCover( - result.image, - title, - streamResponse, - ); + let coverImagePath: string | undefined = undefined; + if (result?.image?.length) { + log.debug("using local image for cover image", { image: result.image }); + coverImagePath = await fetchAndStoreCover( + result.image, + title, + streamResponse, + ); + } else { + const urlPath = await getUnsplashCoverImage( + result.markdown, + streamResponse, + ); + coverImagePath = await fetchAndStoreCover(urlPath, title, streamResponse); + log.debug("using unsplash for cover image", { image: coverImagePath }); + } + + const url = toUrlSafeString(title); const newArticle: ArticleResource["content"] = { _type: "Article", headline: title, - articleBody: result.content, + articleBody: result.markdown, url: fetchUrl, datePublished: formatDate( result?.published || aiMeta?.datePublished || undefined, @@ -100,16 +135,16 @@ async function processCreateArticle( }, } as const; - streamResponse.enqueue("writing to disk"); + streamResponse.info("writing to disk"); log.debug("writing to disk", { ...newArticle, articleBody: newArticle.articleBody?.slice(0, 200), }); - await createResource(`articles/${toUrlSafeString(title)}.md`, newArticle); + await createResource(`articles/${url}.md`, newArticle); - streamResponse.enqueue("id: " + title); + streamResponse.send({ type: "finished", url }); } async function processCreateYoutubeVideo( @@ -122,13 +157,13 @@ async function processCreateYoutubeVideo( url: fetchUrl, }); - streamResponse.enqueue("getting video infos from youtube api"); + streamResponse.info("getting video infos from youtube api"); const youtubeId = extractYoutubeId(fetchUrl); const video = await getYoutubeVideoDetails(youtubeId); - streamResponse.enqueue("shortening title with openai"); + streamResponse.info("shortening title with openai"); const videoTitle = await openai.shortenTitle(video.snippet.title) || video.snippet.title; @@ -152,16 +187,18 @@ async function processCreateYoutubeVideo( }, }; - streamResponse.enqueue("creating article"); + streamResponse.info("creating article"); + + const filename = toUrlSafeString(videoTitle); await createResource( - `articles/${toUrlSafeString(videoTitle)}.md`, + `articles/${filename}.md`, newArticle, ); - streamResponse.enqueue("finished"); + streamResponse.info("finished"); - streamResponse.enqueue("id: " + toUrlSafeString(videoTitle)); + streamResponse.send({ type: "finished", url: filename }); } export const handler: Handlers = { diff --git a/routes/api/articles/enhance/[name].ts b/routes/api/articles/enhance/[name].ts new file mode 100644 index 0000000..1acbc96 --- /dev/null +++ b/routes/api/articles/enhance/[name].ts @@ -0,0 +1,191 @@ +import { FreshContext, Handlers } from "$fresh/server.ts"; +import { fileExtension } from "https://deno.land/x/file_extension@v2.1.0/mod.ts"; +import { formatDate, safeFileName } from "@lib/string.ts"; +import { createStreamResponse } from "@lib/helpers.ts"; +import { + AccessDeniedError, + BadRequestError, + NotFoundError, +} from "@lib/errors.ts"; +import { createResource, fetchResource } from "@lib/marka/index.ts"; +import { ArticleResource } from "@lib/marka/schema.ts"; +import { webScrape } from "@lib/webScraper.ts"; +import * as openai from "@lib/openai.ts"; +import * as unsplash from "@lib/unsplash.ts"; +import { createLogger } from "@lib/log/index.ts"; + +function ext(str: string) { + try { + const u = new URL(str); + if (u.searchParams.has("fm")) { + return u.searchParams.get("fm")!; + } + return fileExtension(u.pathname); + } catch (_e) { + return fileExtension(str); + } +} + +const log = createLogger("api/article/enhance"); + +async function getUnsplashCoverImage( + content: string, + streamResponse: ReturnType, +): Promise { + try { + streamResponse.info("creating unsplash search term"); + const searchTerm = await openai.createUnsplashSearchTerm(content); + if (!searchTerm) return; + streamResponse.info(`searching for ${searchTerm}`); + const unsplashUrl = await unsplash.getImageBySearchTerm(searchTerm); + return unsplashUrl; + } catch (e) { + log.error("Failed to get unsplash cover image", e); + return undefined; + } +} + +async function fetchAndStoreCover( + imageUrl: string | undefined, + title: string, + streamResponse: ReturnType, +): Promise { + if (!imageUrl) return; + const imagePath = `articles/images/${safeFileName(title)}_cover.${ + ext(imageUrl) + }`; + try { + streamResponse.info("downloading cover"); + const res = await fetch(imageUrl); + if (!res.ok) { + log.error(`Failed to download remote image: ${imageUrl}`, { + status: res.status, + }); + return; + } + const buffer = await res.arrayBuffer(); + streamResponse.info("saving cover"); + await createResource(imagePath, buffer); + return `resources/${imagePath}`; + } catch (err) { + log.error(`Failed to save image: ${imageUrl}`, err); + return; + } +} + +async function processEnhanceArticle( + name: string, + streamResponse: ReturnType, +) { + const article = await fetchResource( + `articles/${name}`, + ); + if (!article) { + throw new NotFoundError(); + } + + const fetchUrl = article.content?.url; + if (!fetchUrl) { + throw new BadRequestError("Article has no URL to enhance from."); + } + + log.info("enhancing article from url", { url: fetchUrl }); + streamResponse.info("scraping url"); + const result = await webScrape(fetchUrl, streamResponse); + + streamResponse.info("parsing content"); + + log.debug("downloaded and parsed", result); + + streamResponse.info("extracting metadata with openai"); + const aiMeta = await openai.extractArticleMetadata(result.markdown); + + const title = result?.title || aiMeta?.headline || + article.content?.headline || ""; + + article.content ??= { + _type: "Article", + headline: title, + url: fetchUrl, + }; + + article.content.articleBody = result.markdown; + article.content.datePublished ??= formatDate( + result?.published || aiMeta?.datePublished || undefined, + ); + + if (!article.content.author?.name || article.content.author.name === "") { + article.content.author = { + _type: "Person", + name: (result.schemaOrgData?.author?.name || aiMeta?.author || "") + .replace( + "@", + "twitter:", + ), + }; + } + + if (!article.content.image) { + let coverImagePath: string | undefined = undefined; + if (result?.image?.length) { + log.debug("using local image for cover image", { image: result.image }); + coverImagePath = await fetchAndStoreCover( + result.image, + title, + streamResponse, + ); + } else { + const urlPath = await getUnsplashCoverImage( + result.content, + streamResponse, + ); + coverImagePath = await fetchAndStoreCover(urlPath, title, streamResponse); + log.debug("using unsplash for cover image", { image: coverImagePath }); + } + if (coverImagePath) { + article.content.image = coverImagePath; + } + } + + log.debug("writing to disk", { + name: name, + article: { + ...article, + content: { + ...article.content, + articleBody: article.content.articleBody?.slice(0, 200), + }, + }, + }); + + streamResponse.info("writing to disk"); + await createResource(`articles/${name}`, article.content); + streamResponse.send({ type: "finished", url: name.replace(/$\.md/, "") }); +} + +const POST = ( + _req: Request, + ctx: FreshContext, +): Response => { + const session = ctx.state.session; + if (!session) { + throw new AccessDeniedError(); + } + + const streamResponse = createStreamResponse(); + + processEnhanceArticle(ctx.params.name, streamResponse) + .catch((err) => { + log.error(err); + streamResponse.error(err.message); + }) + .finally(() => { + streamResponse.cancel(); + }); + + return streamResponse.response; +}; + +export const handler: Handlers = { + POST, +}; diff --git a/routes/api/index.ts b/routes/api/index.ts index fdf544d..a4dcaa8 100644 --- a/routes/api/index.ts +++ b/routes/api/index.ts @@ -2,7 +2,7 @@ import { Handlers } from "$fresh/server.ts"; import { json } from "@lib/helpers.ts"; export const handler: Handlers = { - async GET() { + GET() { return json([]); }, }; diff --git a/routes/api/recipes/create/index.ts b/routes/api/recipes/create/index.ts index 3241968..5a5c518 100644 --- a/routes/api/recipes/create/index.ts +++ b/routes/api/recipes/create/index.ts @@ -10,7 +10,6 @@ import { parseJsonLdToRecipeSchema } from "./parseJsonLd.ts"; import z from "zod"; import { createResource } from "@lib/marka/index.ts"; import { webScrape } from "@lib/webScraper.ts"; -import { Defuddle } from "defuddle/node"; import { RecipeResource } from "@lib/marka/schema.ts"; const log = createLogger("api/article"); @@ -23,18 +22,14 @@ async function processCreateRecipeFromUrl( ) { log.info("create article from url", { url: fetchUrl }); - streamResponse.enqueue("downloading article"); + streamResponse.info("downloading article"); - const doc = await webScrape(fetchUrl, streamResponse); + const result = await webScrape(fetchUrl, streamResponse); - const result = await Defuddle(doc, fetchUrl, { - markdown: true, - }); - - streamResponse.enqueue("download success"); + streamResponse.info("download success"); const jsonLds = Array.from( - doc?.querySelectorAll( + result.dom?.querySelectorAll( "script[type='application/ld+json']", ), ) as unknown as HTMLScriptElement[]; @@ -48,11 +43,11 @@ async function processCreateRecipeFromUrl( } if (!recipe) { - const res = await openai.extractRecipe(result.content); + const res = await openai.extractRecipe(result.markdown); if (!res || "errorMessages" in res) { const errorMessage = res?.errorMessages?.[0] || "could not extract recipe"; - streamResponse.enqueue(`failed to extract recipe: ${errorMessage}`); + streamResponse.error(`failed to extract recipe: ${errorMessage}`); return; } recipe = res; @@ -61,7 +56,7 @@ async function processCreateRecipeFromUrl( const id = toUrlSafeString(recipe?.name || ""); if (!recipe) { - streamResponse.enqueue("failed to parse recipe"); + streamResponse.error("failed to parse recipe"); streamResponse.cancel(); return; } @@ -80,11 +75,11 @@ async function processCreateRecipeFromUrl( const finalPath = `resources/recipes/images/${ safeFileName(id) }_cover.${extension}`; - streamResponse.enqueue("downloading image"); + streamResponse.info("downloading image"); try { - streamResponse.enqueue("downloading image"); + streamResponse.info("downloading image"); const res = await fetch(newRecipe.image); - streamResponse.enqueue("saving image"); + streamResponse.info("saving image"); const buffer = await res.arrayBuffer(); await createResource(finalPath, buffer); newRecipe.image = finalPath; @@ -93,11 +88,11 @@ async function processCreateRecipeFromUrl( } } - streamResponse.enqueue("finished processing, creating file"); + streamResponse.info("finished processing, creating file"); await createResource(`recipes/${id}.md`, newRecipe); - streamResponse.enqueue("id: " + id); + streamResponse.send({ type: "finished", url: id }); } export const handler: Handlers = { @@ -119,7 +114,7 @@ export const handler: Handlers = { processCreateRecipeFromUrl({ fetchUrl, streamResponse }).then((article) => { log.debug("created article from link", { article }); }).catch((err) => { - streamResponse.enqueue(`error creating recipe: ${err}`); + streamResponse.error(`creating recipe: ${err}`); log.error(err); }).finally(() => { streamResponse.cancel(); diff --git a/routes/api/recommendation/all.ts b/routes/api/recommendation/all.ts index aa4b6f3..3188a34 100644 --- a/routes/api/recommendation/all.ts +++ b/routes/api/recommendation/all.ts @@ -21,7 +21,7 @@ async function processUpdateRecommendations( return true; }) as ReviewResource[]; - streamResponse.enqueue("Fetched all movies"); + streamResponse.info("fetched all movies"); let done = 0; const total = movies.length; @@ -41,7 +41,7 @@ async function processUpdateRecommendations( console.log(err); } done++; - streamResponse.enqueue( + streamResponse.info( `${Math.floor((done / total) * 100)}% [${ done + 1 }/${total}] ${movie.name}`, @@ -50,7 +50,7 @@ async function processUpdateRecommendations( console.log(err); }); - streamResponse.enqueue("100% Finished"); + streamResponse.info("100% Finished"); } export const handler: Handlers = { diff --git a/routes/articles/[name].tsx b/routes/articles/[name].tsx index 60a0899..b2ed563 100644 --- a/routes/articles/[name].tsx +++ b/routes/articles/[name].tsx @@ -50,7 +50,7 @@ export default function Greet( context={article} > - +