feat: fallback to unsplash cover when article contains no image

This commit is contained in:
Max Richter
2025-11-09 23:52:53 +01:00
parent 6c6b69a46a
commit 655fc648e6
27 changed files with 687 additions and 224 deletions

View File

@@ -17,3 +17,4 @@ export { default as IconSearch } from "https://deno.land/x/tabler_icons_tsx@0.0.
export { default as IconGhost } from "https://deno.land/x/tabler_icons_tsx@0.0.3/tsx/ghost.tsx";
export { default as IconBrandYoutube } from "https://deno.land/x/tabler_icons_tsx@0.0.3/tsx/brand-youtube.tsx";
export { default as IconWand } from "https://deno.land/x/tabler_icons_tsx@0.0.5/tsx/wand.tsx";
export { default as IconAlertCircle } from "https://deno.land/x/tabler_icons_tsx@0.0.5/tsx/alert-circle.tsx";

View File

@@ -48,6 +48,7 @@
"camelcase-css": "npm:camelcase-css",
"thumbhash": "npm:thumbhash@^0.1.1",
"tsx": "npm:tsx@^4.19.2",
"turndown": "npm:turndown@^7.2.2",
"yaml": "https://deno.land/std@0.197.0/yaml/mod.ts",
"zod": "npm:zod@^3.24.1",
"fs": "https://deno.land/std/fs/mod.ts"

View File

@@ -11,6 +11,7 @@ import * as $admin_log_index from "./routes/admin/log/index.tsx";
import * as $admin_performance_index from "./routes/admin/performance/index.tsx";
import * as $api_articles_name_ from "./routes/api/articles/[name].ts";
import * as $api_articles_create_index from "./routes/api/articles/create/index.ts";
import * as $api_articles_enhance_name_ from "./routes/api/articles/enhance/[name].ts";
import * as $api_articles_index from "./routes/api/articles/index.ts";
import * as $api_auth_callback from "./routes/api/auth/callback.ts";
import * as $api_auth_login from "./routes/api/auth/login.ts";
@@ -57,6 +58,7 @@ import * as $KMenu_commands_create_movie from "./islands/KMenu/commands/create_m
import * as $KMenu_commands_create_recipe from "./islands/KMenu/commands/create_recipe.ts";
import * as $KMenu_commands_create_recommendations from "./islands/KMenu/commands/create_recommendations.ts";
import * as $KMenu_commands_create_series from "./islands/KMenu/commands/create_series.ts";
import * as $KMenu_commands_enhance_article_infos from "./islands/KMenu/commands/enhance_article_infos.ts";
import * as $KMenu_types from "./islands/KMenu/types.ts";
import * as $Link from "./islands/Link.tsx";
import * as $Recommendations from "./islands/Recommendations.tsx";
@@ -74,6 +76,7 @@ const manifest = {
"./routes/admin/performance/index.tsx": $admin_performance_index,
"./routes/api/articles/[name].ts": $api_articles_name_,
"./routes/api/articles/create/index.ts": $api_articles_create_index,
"./routes/api/articles/enhance/[name].ts": $api_articles_enhance_name_,
"./routes/api/articles/index.ts": $api_articles_index,
"./routes/api/auth/callback.ts": $api_auth_callback,
"./routes/api/auth/login.ts": $api_auth_login,
@@ -127,6 +130,8 @@ const manifest = {
"./islands/KMenu/commands/create_recommendations.ts":
$KMenu_commands_create_recommendations,
"./islands/KMenu/commands/create_series.ts": $KMenu_commands_create_series,
"./islands/KMenu/commands/enhance_article_infos.ts":
$KMenu_commands_enhance_article_infos,
"./islands/KMenu/types.ts": $KMenu_types,
"./islands/Link.tsx": $Link,
"./islands/Recommendations.tsx": $Recommendations,

View File

@@ -21,7 +21,7 @@ const KMenuEntry = (
: "text-gray-400"
}`}
>
{entry?.icon && icons[entry.icon]({ class: "w-4 h-4 mr-1" })}
{entry?.icon && icons[entry.icon]({ class: "min-w-4 h-4 mr-1" })}
{entry.title}
</div>
);
@@ -168,11 +168,13 @@ export const KMenu = (
style={{ background: "#2B2930", color: "#818181" }}
>
<div
class={`grid h-12 text-gray-400 ${
activeState.value !== "loading" && "border-b"
class={`grid min-h-12 text-gray-400 ${
(activeState.value === "normal" || activeState.value === "input") &&
"border-b"
} border-gray-500 `}
style={{
gridTemplateColumns: activeState.value !== "loading"
gridTemplateColumns:
(activeState.value === "normal" || activeState.value === "input")
? "auto 1fr"
: "1fr",
}}
@@ -198,12 +200,18 @@ export const KMenu = (
)}
{activeState.value === "loading" && (
<div class="py-3 px-4 flex items-center gap-2">
<icons.IconLoader2 class="animate-spin w-4 h-4" />
<icons.IconLoader2 class="animate-spin min-w-4 h-4" />
{loadingText.value || "Loading..."}
</div>
)}
{activeState.value === "error" && (
<div class="py-3 px-4 flex items-center gap-2 text-red-400">
<icons.IconAlertCircle class="min-w-4 h-4" />
{loadingText.value || "An error occurred"}
</div>
{activeState.value === "normal" &&
)}
</div>
{(activeState.value === "normal" || activeState.value === "input") &&
(
<div
class=""

View File

@@ -7,6 +7,7 @@ import { addSeriesInfo } from "@islands/KMenu/commands/add_series_infos.ts";
import { createNewSeries } from "@islands/KMenu/commands/create_series.ts";
import { updateAllRecommendations } from "@islands/KMenu/commands/create_recommendations.ts";
import { createNewRecipe } from "@islands/KMenu/commands/create_recipe.ts";
import { enhanceArticleInfo } from "@islands/KMenu/commands/enhance_article_infos.ts";
export const menus: Record<string, Menu> = {
main: {
@@ -77,6 +78,7 @@ export const menus: Record<string, Menu> = {
createNewSeries,
createNewRecipe,
addMovieInfos,
enhanceArticleInfo,
// updateAllRecommendations,
],
},

View File

@@ -8,6 +8,7 @@ export const addMovieInfos: MenuEntry = {
meta: "",
icon: "IconReportSearch",
cb: async (state, context) => {
try {
state.activeState.value = "loading";
const movie = context as ReviewResource;
@@ -17,6 +18,10 @@ export const addMovieInfos: MenuEntry = {
`/api/tmdb/query?q=${encodeURIComponent(query)}`,
);
if (!response.ok) {
throw new Error(await response.text());
}
const json = await response.json() as TMDBMovie[];
const menuID = `result/${movie.name}`;
@@ -26,6 +31,7 @@ export const addMovieInfos: MenuEntry = {
entries: json.map((m) => ({
title: `${m.title} released ${m.release_date}`,
cb: async () => {
try {
state.activeState.value = "loading";
await fetch(`/api/movies/enhance/${movie.name}/`, {
method: "POST",
@@ -34,6 +40,10 @@ export const addMovieInfos: MenuEntry = {
state.visible.value = false;
state.activeState.value = "normal";
globalThis.location.reload();
} catch (e) {
state.activeState.value = "error";
state.loadingText.value = e.message;
}
},
})),
};
@@ -41,6 +51,10 @@ export const addMovieInfos: MenuEntry = {
state.activeMenu.value = menuID;
state.commandInput.value = "";
state.activeState.value = "normal";
} catch (e) {
state.activeState.value = "error";
state.loadingText.value = e.message;
}
},
visible: () => {
const loc = globalThis["location"];

View File

@@ -8,6 +8,7 @@ export const addSeriesInfo: MenuEntry = {
meta: "",
icon: "IconReportSearch",
cb: async (state, context) => {
try {
state.activeState.value = "loading";
const series = context as ReviewResource;
@@ -17,6 +18,10 @@ export const addSeriesInfo: MenuEntry = {
`/api/tmdb/query?q=${encodeURIComponent(query)}&type=serie`,
);
if (!response.ok) {
throw new Error(await response.text());
}
const json = await response.json() as TMDBSeries[];
const menuID = `result/${series.name}`;
@@ -26,6 +31,7 @@ export const addSeriesInfo: MenuEntry = {
entries: json.map((m) => ({
title: `${m.name || m.original_name} released ${m.first_air_date}`,
cb: async () => {
try {
state.activeState.value = "loading";
await fetch(`/api/series/enhance/${series.name}/`, {
method: "POST",
@@ -34,6 +40,10 @@ export const addSeriesInfo: MenuEntry = {
state.visible.value = false;
state.activeState.value = "normal";
//window.location.reload();
} catch (e) {
state.activeState.value = "error";
state.loadingText.value = e.message;
}
},
})),
};
@@ -41,6 +51,10 @@ export const addSeriesInfo: MenuEntry = {
state.commandInput.value = "";
state.activeMenu.value = menuID;
state.activeState.value = "normal";
} catch (e) {
state.activeState.value = "error";
state.loadingText.value = e.message;
}
},
visible: () => {
const loc = globalThis["location"];

View File

@@ -22,14 +22,16 @@ export const createNewArticle: MenuEntry = {
state.activeState.value = "loading";
fetchStream("/api/articles/create?url=" + value, (chunk) => {
if (chunk.startsWith("id:")) {
if (chunk.type === "error") {
state.activeState.value = "error";
state.loadingText.value = chunk.message;
} else if (chunk.type === "finished") {
state.loadingText.value = "Finished";
setTimeout(() => {
window.location.href = "/articles/" +
chunk.replace("id:", "").trim();
globalThis.location.href = "/articles/" + chunk.url;
}, 500);
} else {
state.loadingText.value = chunk;
state.loadingText.value = chunk.message;
}
});
}

View File

@@ -31,6 +31,7 @@ export const createNewMovie: MenuEntry = {
let currentQuery: string;
const search = debounce(async function search(query: string) {
try {
currentQuery = query;
if (query.length < 2) {
return;
@@ -38,6 +39,10 @@ export const createNewMovie: MenuEntry = {
const response = await fetch("/api/tmdb/query?q=" + query);
if (!response.ok) {
throw new Error(await response.text());
}
const movies = await response.json() as TMDBMovie[];
if (query !== currentQuery) return;
@@ -48,18 +53,30 @@ export const createNewMovie: MenuEntry = {
return {
title: `${r.title} - ${r.release_date}`,
cb: async () => {
try {
state.activeState.value = "loading";
const response = await fetch("/api/movies/" + r.id, {
method: "POST",
});
if (!response.ok) {
throw new Error(await response.text());
}
const movie = await response.json() as ReviewResource;
unsub();
globalThis.location.href = "/movies/" + movie.name;
} catch (e) {
state.activeState.value = "error";
state.loadingText.value = e.message;
}
},
};
}),
};
state.activeMenu.value = "input_link";
} catch (e) {
state.activeState.value = "error";
state.loadingText.value = e.message;
}
}, 500);
const unsub = state.commandInput.subscribe((value) => {

View File

@@ -21,15 +21,17 @@ export const createNewRecipe: MenuEntry = {
state.activeState.value = "loading";
fetchStream("/api/recipes/create?url=" + value, (chunk) => {
if (chunk.startsWith("id:")) {
fetchStream("/api/recipes/create?url=" + value, (msg) => {
if (msg.type === "error") {
state.activeState.value = "error";
state.loadingText.value = msg.message;
} else if (msg.type === "finished") {
state.loadingText.value = "Finished";
setTimeout(() => {
globalThis.location.href = "/recipes/" +
chunk.replace("id:", "").trim();
globalThis.location.href = "/recipes/" + msg.url;
}, 500);
} else {
state.loadingText.value = chunk;
state.loadingText.value = msg.message;
}
});
}

View File

@@ -10,12 +10,15 @@ export const updateAllRecommendations: MenuEntry = {
state.activeState.value = "loading";
fetchStream("/api/recommendation/all", (chunk) => {
if (chunk.toLowerCase().includes("finish")) {
if (chunk.type === "error") {
state.activeState.value = "error";
state.loadingText.value = chunk.message;
} else if (chunk.type === "finished") {
setTimeout(() => {
window.location.reload();
globalThis.location.reload();
}, 500);
} else {
state.loadingText.value = chunk;
state.loadingText.value = chunk.message;
}
});
},

View File

@@ -31,6 +31,7 @@ export const createNewSeries: MenuEntry = {
let currentQuery: string;
const search = debounce(async function search(query: string) {
try {
currentQuery = query;
if (query.length < 2) {
return;
@@ -40,6 +41,10 @@ export const createNewSeries: MenuEntry = {
"/api/tmdb/query?q=" + query + "&type=series",
);
if (!response.ok) {
throw new Error(await response.text());
}
const series = await response.json() as TMDBSeries[];
if (query !== currentQuery) return;
@@ -55,11 +60,15 @@ export const createNewSeries: MenuEntry = {
const response = await fetch("/api/series/" + r.id, {
method: "POST",
});
if (!response.ok) {
throw new Error(await response.text());
}
const series = await response.json() as ReviewResource;
unsub();
globalThis.location.href = "/series/" + series.name;
} catch (_e) {
state.activeState.value = "normal";
} catch (e) {
state.activeState.value = "error";
state.loadingText.value = e.message;
}
},
};
@@ -67,6 +76,10 @@ export const createNewSeries: MenuEntry = {
};
state.commandInput.value = "";
state.activeMenu.value = "input_link";
} catch (e) {
state.activeState.value = "error";
state.loadingText.value = e.message;
}
}, 500);
const unsub = state.commandInput.subscribe((value) => {

View File

@@ -0,0 +1,41 @@
import { getCookie } from "@lib/string.ts";
import { MenuEntry } from "../types.ts";
import { ArticleResource } from "@lib/marka/schema.ts";
import { fetchStream } from "@lib/helpers.ts";
export const enhanceArticleInfo: MenuEntry = {
title: "Enhance Article Info",
meta: "Update metadata and content from source url",
icon: "IconReportSearch",
cb: (state, context) => {
state.activeState.value = "loading";
const article = context as ArticleResource;
fetchStream(
`/api/articles/enhance/${article.name}/`,
(chunk) => {
if (chunk.type === "error") {
state.activeState.value = "error";
state.loadingText.value = chunk.message;
} else if (chunk.type == "finished") {
state.loadingText.value = "Finished";
setTimeout(() => {
state.visible.value = false;
state.activeState.value = "normal";
globalThis.location.reload();
}, 500);
} else {
state.loadingText.value = chunk.message;
}
},
{ method: "POST" },
);
},
visible: () => {
const loc = globalThis["location"];
if (!getCookie("session_cookie")) return false;
return (loc?.pathname?.includes("article") &&
!loc.pathname.endsWith("articles"));
},
};

View File

@@ -7,6 +7,7 @@ export const PROXY_PASSWORD = Deno.env.get("PROXY_PASSWORD");
export const TMDB_API_KEY = Deno.env.get("TMDB_API_KEY");
export const OPENAI_API_KEY = Deno.env.get("OPENAI_API_KEY");
export const YOUTUBE_API_KEY = Deno.env.get("YOUTUBE_API_KEY");
export const UNSPLASH_API_KEY = Deno.env.get("UNSPLASH_API_KEY");
export const TELEGRAM_API_KEY = Deno.env.get("TELEGRAM_API_KEY")!;
export const GITEA_SERVER = Deno.env.get("GITEA_SERVER");

View File

@@ -31,19 +31,54 @@ export const fixRenderedMarkdown = (content: string) => {
});
};
export async function fetchStream(url: string, cb: (chunk: string) => void) {
const response = await fetch(url);
const reader = response?.body?.getReader();
if (reader) {
type StreamMessage = {
type: "info";
message: string;
} | {
type: "error";
message: string;
} | {
type: "warning";
message: string;
} | {
type: "finished";
url: string;
};
export async function fetchStream(
url: string,
cb: (chunk: StreamMessage) => void,
init?: RequestInit,
) {
const res = await fetch(url, init);
if (!res.body) return;
let buffer = "";
const reader = res.body
.pipeThrough(new TextDecoderStream())
.pipeThrough(
new TransformStream<string, string>({
transform(chunk, controller) {
buffer += chunk;
let idx;
while ((idx = buffer.indexOf("\n")) >= 0) {
const line = buffer.slice(0, idx).trim();
buffer = buffer.slice(idx + 1);
if (line) controller.enqueue(line);
}
},
flush(controller) {
const line = buffer.trim();
if (line) controller.enqueue(line);
},
}),
)
.getReader();
while (true) {
const { done, value } = await reader.read();
if (done) return;
const data = new TextDecoder().decode(value);
data
.split("$")
.filter((d) => d && d.length)
.map((d) => cb(Array.isArray(d) ? d[0] : d));
}
if (done) break;
cb(JSON.parse(value));
}
}
@@ -58,32 +93,53 @@ export function hashString(message: string) {
}
export const createStreamResponse = () => {
let controller: ReadableStreamController<ArrayBufferView>;
const body = new ReadableStream({
start(cont) {
controller = cont;
const encoder = new TextEncoder();
let controller: ReadableStreamDefaultController<Uint8Array>;
const body = new ReadableStream<Uint8Array>({
start(c) {
controller = c;
},
});
const response = new Response(body, {
headers: {
"content-type": "text/plain",
// newline-delimited JSON
"content-type": "application/x-ndjson; charset=utf-8",
// prevent intermediaries from buffering/transforming
"cache-control": "no-cache, no-transform",
"x-content-type-options": "nosniff",
// nginx hint to disable proxy buffering
"x-accel-buffering": "no",
// if you control compression, keep it off for streams
// "content-encoding": "identity",
},
});
function cancel() {
controller.close();
const send = (obj: unknown) => {
controller.enqueue(encoder.encode(JSON.stringify(obj) + "\n")); // ← delimiter
};
const cancel = () => controller.close();
function info(message: string) {
return send({ type: "info", message });
}
function enqueue(chunk: string) {
controller?.enqueue(new TextEncoder().encode("$" + chunk));
function error(message: string) {
return send({ type: "error", message });
}
function warning(message: string) {
return send({ type: "warning", message });
}
return {
response,
cancel,
enqueue,
send,
info,
error,
warning,
};
};

View File

@@ -38,13 +38,13 @@ export function createLogger(scope: string, _options?: LoggerOptions): Logger {
export function loggerFromStream(stream: StreamResponse) {
return {
debug: (...data: unknown[]) =>
stream.enqueue(`${data.length > 1 ? data.join(" ") : data[0]}`),
stream.info(`${data.length > 1 ? data.join(" ") : data[0]}`),
info: (...data: unknown[]) =>
stream.enqueue(`${data.length > 1 ? data.join(" ") : data[0]}`),
stream.info(`${data.length > 1 ? data.join(" ") : data[0]}`),
error: (...data: unknown[]) =>
stream.enqueue(`[ERROR]: ${data.length > 1 ? data.join(" ") : data[0]}`),
stream.error(`[ERROR]: ${data.length > 1 ? data.join(" ") : data[0]}`),
warn: (...data: unknown[]) =>
stream.enqueue(`[WARN]: ${data.length > 1 ? data.join(" ") : data[0]}`),
stream.warning(`[WARN]: ${data.length > 1 ? data.join(" ") : data[0]}`),
};
}

View File

@@ -106,8 +106,11 @@ export async function createResource(
body: isJson ? JSON.stringify(content) : content,
});
if (!response.ok) {
const text = await response.text();
throw new Error(
`Failed to create resource (resources/${path}) : ${response.status}`,
`failed to create resource (resources/${path}): ${
text || response.status
}`,
);
}
return response.json();

View File

@@ -195,6 +195,23 @@ respond with a plain unordered list each item starting with the year the movie w
return recommendations;
};
export async function createUnsplashSearchTerm(content: string) {
if (!openAI) return;
const chatCompletion = await openAI.chat.completions.create({
model: model,
messages: [
{
role: "system",
content:
"Please respond with a search term for unsplash for the following article",
},
{ role: "user", content: content.slice(0, 10_000) },
],
});
return chatCompletion.choices[0].message.content?.toLowerCase();
}
export async function createTags(content: string) {
if (!openAI) return;
const chatCompletion = await openAI.chat.completions.create({

View File

@@ -9,7 +9,7 @@ export async function fetchHtmlWithPlaywright(
fetchUrl: string,
streamResponse: ReturnType<typeof createStreamResponse>,
): Promise<string> {
streamResponse.enqueue("booting up playwright");
streamResponse.info("booting up playwright");
const config: Parameters<typeof firefox.launch>[0] = {};
if (env.PROXY_SERVER) {
@@ -24,7 +24,7 @@ export async function fetchHtmlWithPlaywright(
// Launch the Playwright browser
const browser = await firefox.launch(config);
streamResponse.enqueue("fetching html");
streamResponse.info("fetching html");
try {
// Open a new browser context and page
@@ -42,7 +42,7 @@ export async function fetchHtmlWithPlaywright(
return html;
} catch (error) {
streamResponse.enqueue("error fetching html");
streamResponse.error("error fetching html");
console.error(error);
return "";
} finally {

29
lib/unsplash.ts Normal file
View File

@@ -0,0 +1,29 @@
import { UNSPLASH_API_KEY } from "./env.ts";
const API_URL = "https://api.unsplash.com";
export async function getImageBySearchTerm(
searchTerm: string,
): Promise<string | undefined> {
if (!UNSPLASH_API_KEY) {
throw new Error("UNSPLASH_API_KEY is not set");
}
const url = new URL("/search/photos", API_URL);
url.searchParams.append("query", searchTerm);
url.searchParams.append("per_page", "1");
url.searchParams.append("orientation", "landscape");
const response = await fetch(url.toString(), {
headers: {
Authorization: `Client-ID ${UNSPLASH_API_KEY}`,
},
});
if (!response.ok) {
throw new Error(`Unsplash API request failed: ${response.statusText}`);
}
const data = await response.json();
return data.results[0]?.urls?.regular;
}

View File

@@ -1,6 +1,8 @@
import { JSDOM } from "jsdom";
import { fetchHtmlWithPlaywright } from "./playwright.ts";
import { createStreamResponse } from "./helpers.ts";
import { Defuddle } from "defuddle/node";
import TurndownService from "turndown";
/**
* Mutates the given JSDOM instance: rewrites all relevant URL-bearing attributes
@@ -164,6 +166,8 @@ function absolutizeMetaRefresh(content: string, base: string): string {
return `${delay}; url=${abs}`;
}
const turndownService = new TurndownService();
export async function webScrape(
url: string,
streamResponse: ReturnType<typeof createStreamResponse>,
@@ -172,5 +176,12 @@ export async function webScrape(
const html = await fetchHtmlWithPlaywright(url, streamResponse);
const dom = new JSDOM(html);
absolutizeDomUrls(dom, u.origin);
return dom;
const result = await Defuddle(dom, url);
return {
...result,
dom,
markdown: turndownService.turndown(result.content),
};
}

View File

@@ -3,6 +3,7 @@ import { Defuddle } from "defuddle/node";
import { AccessDeniedError, BadRequestError } from "@lib/errors.ts";
import { createStreamResponse, isValidUrl } from "@lib/helpers.ts";
import * as openai from "@lib/openai.ts";
import * as unsplash from "@lib/unsplash.ts";
import { getYoutubeVideoDetails } from "@lib/youtube.ts";
import {
extractYoutubeId,
@@ -19,6 +20,35 @@ import { fileExtension } from "https://deno.land/x/file_extension@v2.1.0/mod.ts"
const log = createLogger("api/article");
async function getUnsplashCoverImage(
content: string,
streamResponse: ReturnType<typeof createStreamResponse>,
): Promise<string | undefined> {
try {
streamResponse.info("creating unsplash search term");
const searchTerm = await openai.createUnsplashSearchTerm(content);
if (!searchTerm) return;
streamResponse.info(`searching for ${searchTerm}`);
const unsplashUrl = await unsplash.getImageBySearchTerm(searchTerm);
return unsplashUrl;
} catch (e) {
log.error("Failed to get unsplash cover image", e);
return undefined;
}
}
function ext(str: string) {
try {
const u = new URL(str);
if (u.searchParams.has("fm")) {
return u.searchParams.get("fm")!;
}
return fileExtension(u.pathname);
} catch (_e) {
return fileExtension(str);
}
}
async function fetchAndStoreCover(
imageUrl: string | undefined,
title: string,
@@ -26,12 +56,12 @@ async function fetchAndStoreCover(
): Promise<string | undefined> {
if (!imageUrl) return;
const imagePath = `articles/images/${safeFileName(title)}_cover.${
fileExtension(imageUrl)
ext(imageUrl)
}`;
try {
streamResponse?.enqueue("downloading image");
streamResponse?.info("downloading image");
const res = await fetch(imageUrl);
streamResponse?.enqueue("saving image");
streamResponse?.info("saving image");
if (!res.ok) {
console.log(`Failed to download remote image: ${imageUrl}`, res.status);
return;
@@ -53,38 +83,43 @@ async function processCreateArticle(
) {
log.info("create article from url", { url: fetchUrl });
streamResponse.enqueue("downloading article");
streamResponse.info("downloading article");
const doc = await webScrape(fetchUrl, streamResponse);
const result = await webScrape(fetchUrl, streamResponse);
const result = await Defuddle(doc, fetchUrl, {
markdown: true,
});
log.debug("downloaded and parse parsed", result);
log.debug("downloaded and parse parsed", {
...result,
url: fetchUrl,
content: result.content.slice(0, 200),
});
streamResponse.info("parsed article, creating tags with openai");
streamResponse.enqueue("parsed article, creating tags with openai");
const aiMeta = await openai.extractArticleMetadata(result.markdown);
const aiMeta = await openai.extractArticleMetadata(result.content);
streamResponse.enqueue("postprocessing article");
streamResponse.info("postprocessing article");
const title = result?.title || aiMeta?.headline || "";
const coverImagePath = await fetchAndStoreCover(
let coverImagePath: string | undefined = undefined;
if (result?.image?.length) {
log.debug("using local image for cover image", { image: result.image });
coverImagePath = await fetchAndStoreCover(
result.image,
title,
streamResponse,
);
} else {
const urlPath = await getUnsplashCoverImage(
result.markdown,
streamResponse,
);
coverImagePath = await fetchAndStoreCover(urlPath, title, streamResponse);
log.debug("using unsplash for cover image", { image: coverImagePath });
}
const url = toUrlSafeString(title);
const newArticle: ArticleResource["content"] = {
_type: "Article",
headline: title,
articleBody: result.content,
articleBody: result.markdown,
url: fetchUrl,
datePublished: formatDate(
result?.published || aiMeta?.datePublished || undefined,
@@ -100,16 +135,16 @@ async function processCreateArticle(
},
} as const;
streamResponse.enqueue("writing to disk");
streamResponse.info("writing to disk");
log.debug("writing to disk", {
...newArticle,
articleBody: newArticle.articleBody?.slice(0, 200),
});
await createResource(`articles/${toUrlSafeString(title)}.md`, newArticle);
await createResource(`articles/${url}.md`, newArticle);
streamResponse.enqueue("id: " + title);
streamResponse.send({ type: "finished", url });
}
async function processCreateYoutubeVideo(
@@ -122,13 +157,13 @@ async function processCreateYoutubeVideo(
url: fetchUrl,
});
streamResponse.enqueue("getting video infos from youtube api");
streamResponse.info("getting video infos from youtube api");
const youtubeId = extractYoutubeId(fetchUrl);
const video = await getYoutubeVideoDetails(youtubeId);
streamResponse.enqueue("shortening title with openai");
streamResponse.info("shortening title with openai");
const videoTitle = await openai.shortenTitle(video.snippet.title) ||
video.snippet.title;
@@ -152,16 +187,18 @@ async function processCreateYoutubeVideo(
},
};
streamResponse.enqueue("creating article");
streamResponse.info("creating article");
const filename = toUrlSafeString(videoTitle);
await createResource(
`articles/${toUrlSafeString(videoTitle)}.md`,
`articles/${filename}.md`,
newArticle,
);
streamResponse.enqueue("finished");
streamResponse.info("finished");
streamResponse.enqueue("id: " + toUrlSafeString(videoTitle));
streamResponse.send({ type: "finished", url: filename });
}
export const handler: Handlers = {

View File

@@ -0,0 +1,191 @@
import { FreshContext, Handlers } from "$fresh/server.ts";
import { fileExtension } from "https://deno.land/x/file_extension@v2.1.0/mod.ts";
import { formatDate, safeFileName } from "@lib/string.ts";
import { createStreamResponse } from "@lib/helpers.ts";
import {
AccessDeniedError,
BadRequestError,
NotFoundError,
} from "@lib/errors.ts";
import { createResource, fetchResource } from "@lib/marka/index.ts";
import { ArticleResource } from "@lib/marka/schema.ts";
import { webScrape } from "@lib/webScraper.ts";
import * as openai from "@lib/openai.ts";
import * as unsplash from "@lib/unsplash.ts";
import { createLogger } from "@lib/log/index.ts";
function ext(str: string) {
try {
const u = new URL(str);
if (u.searchParams.has("fm")) {
return u.searchParams.get("fm")!;
}
return fileExtension(u.pathname);
} catch (_e) {
return fileExtension(str);
}
}
const log = createLogger("api/article/enhance");
async function getUnsplashCoverImage(
content: string,
streamResponse: ReturnType<typeof createStreamResponse>,
): Promise<string | undefined> {
try {
streamResponse.info("creating unsplash search term");
const searchTerm = await openai.createUnsplashSearchTerm(content);
if (!searchTerm) return;
streamResponse.info(`searching for ${searchTerm}`);
const unsplashUrl = await unsplash.getImageBySearchTerm(searchTerm);
return unsplashUrl;
} catch (e) {
log.error("Failed to get unsplash cover image", e);
return undefined;
}
}
async function fetchAndStoreCover(
imageUrl: string | undefined,
title: string,
streamResponse: ReturnType<typeof createStreamResponse>,
): Promise<string | undefined> {
if (!imageUrl) return;
const imagePath = `articles/images/${safeFileName(title)}_cover.${
ext(imageUrl)
}`;
try {
streamResponse.info("downloading cover");
const res = await fetch(imageUrl);
if (!res.ok) {
log.error(`Failed to download remote image: ${imageUrl}`, {
status: res.status,
});
return;
}
const buffer = await res.arrayBuffer();
streamResponse.info("saving cover");
await createResource(imagePath, buffer);
return `resources/${imagePath}`;
} catch (err) {
log.error(`Failed to save image: ${imageUrl}`, err);
return;
}
}
async function processEnhanceArticle(
name: string,
streamResponse: ReturnType<typeof createStreamResponse>,
) {
const article = await fetchResource<ArticleResource>(
`articles/${name}`,
);
if (!article) {
throw new NotFoundError();
}
const fetchUrl = article.content?.url;
if (!fetchUrl) {
throw new BadRequestError("Article has no URL to enhance from.");
}
log.info("enhancing article from url", { url: fetchUrl });
streamResponse.info("scraping url");
const result = await webScrape(fetchUrl, streamResponse);
streamResponse.info("parsing content");
log.debug("downloaded and parsed", result);
streamResponse.info("extracting metadata with openai");
const aiMeta = await openai.extractArticleMetadata(result.markdown);
const title = result?.title || aiMeta?.headline ||
article.content?.headline || "";
article.content ??= {
_type: "Article",
headline: title,
url: fetchUrl,
};
article.content.articleBody = result.markdown;
article.content.datePublished ??= formatDate(
result?.published || aiMeta?.datePublished || undefined,
);
if (!article.content.author?.name || article.content.author.name === "") {
article.content.author = {
_type: "Person",
name: (result.schemaOrgData?.author?.name || aiMeta?.author || "")
.replace(
"@",
"twitter:",
),
};
}
if (!article.content.image) {
let coverImagePath: string | undefined = undefined;
if (result?.image?.length) {
log.debug("using local image for cover image", { image: result.image });
coverImagePath = await fetchAndStoreCover(
result.image,
title,
streamResponse,
);
} else {
const urlPath = await getUnsplashCoverImage(
result.content,
streamResponse,
);
coverImagePath = await fetchAndStoreCover(urlPath, title, streamResponse);
log.debug("using unsplash for cover image", { image: coverImagePath });
}
if (coverImagePath) {
article.content.image = coverImagePath;
}
}
log.debug("writing to disk", {
name: name,
article: {
...article,
content: {
...article.content,
articleBody: article.content.articleBody?.slice(0, 200),
},
},
});
streamResponse.info("writing to disk");
await createResource(`articles/${name}`, article.content);
streamResponse.send({ type: "finished", url: name.replace(/$\.md/, "") });
}
const POST = (
_req: Request,
ctx: FreshContext,
): Response => {
const session = ctx.state.session;
if (!session) {
throw new AccessDeniedError();
}
const streamResponse = createStreamResponse();
processEnhanceArticle(ctx.params.name, streamResponse)
.catch((err) => {
log.error(err);
streamResponse.error(err.message);
})
.finally(() => {
streamResponse.cancel();
});
return streamResponse.response;
};
export const handler: Handlers = {
POST,
};

View File

@@ -2,7 +2,7 @@ import { Handlers } from "$fresh/server.ts";
import { json } from "@lib/helpers.ts";
export const handler: Handlers = {
async GET() {
GET() {
return json([]);
},
};

View File

@@ -10,7 +10,6 @@ import { parseJsonLdToRecipeSchema } from "./parseJsonLd.ts";
import z from "zod";
import { createResource } from "@lib/marka/index.ts";
import { webScrape } from "@lib/webScraper.ts";
import { Defuddle } from "defuddle/node";
import { RecipeResource } from "@lib/marka/schema.ts";
const log = createLogger("api/article");
@@ -23,18 +22,14 @@ async function processCreateRecipeFromUrl(
) {
log.info("create article from url", { url: fetchUrl });
streamResponse.enqueue("downloading article");
streamResponse.info("downloading article");
const doc = await webScrape(fetchUrl, streamResponse);
const result = await webScrape(fetchUrl, streamResponse);
const result = await Defuddle(doc, fetchUrl, {
markdown: true,
});
streamResponse.enqueue("download success");
streamResponse.info("download success");
const jsonLds = Array.from(
doc?.querySelectorAll(
result.dom?.querySelectorAll(
"script[type='application/ld+json']",
),
) as unknown as HTMLScriptElement[];
@@ -48,11 +43,11 @@ async function processCreateRecipeFromUrl(
}
if (!recipe) {
const res = await openai.extractRecipe(result.content);
const res = await openai.extractRecipe(result.markdown);
if (!res || "errorMessages" in res) {
const errorMessage = res?.errorMessages?.[0] ||
"could not extract recipe";
streamResponse.enqueue(`failed to extract recipe: ${errorMessage}`);
streamResponse.error(`failed to extract recipe: ${errorMessage}`);
return;
}
recipe = res;
@@ -61,7 +56,7 @@ async function processCreateRecipeFromUrl(
const id = toUrlSafeString(recipe?.name || "");
if (!recipe) {
streamResponse.enqueue("failed to parse recipe");
streamResponse.error("failed to parse recipe");
streamResponse.cancel();
return;
}
@@ -80,11 +75,11 @@ async function processCreateRecipeFromUrl(
const finalPath = `resources/recipes/images/${
safeFileName(id)
}_cover.${extension}`;
streamResponse.enqueue("downloading image");
streamResponse.info("downloading image");
try {
streamResponse.enqueue("downloading image");
streamResponse.info("downloading image");
const res = await fetch(newRecipe.image);
streamResponse.enqueue("saving image");
streamResponse.info("saving image");
const buffer = await res.arrayBuffer();
await createResource(finalPath, buffer);
newRecipe.image = finalPath;
@@ -93,11 +88,11 @@ async function processCreateRecipeFromUrl(
}
}
streamResponse.enqueue("finished processing, creating file");
streamResponse.info("finished processing, creating file");
await createResource(`recipes/${id}.md`, newRecipe);
streamResponse.enqueue("id: " + id);
streamResponse.send({ type: "finished", url: id });
}
export const handler: Handlers = {
@@ -119,7 +114,7 @@ export const handler: Handlers = {
processCreateRecipeFromUrl({ fetchUrl, streamResponse }).then((article) => {
log.debug("created article from link", { article });
}).catch((err) => {
streamResponse.enqueue(`error creating recipe: ${err}`);
streamResponse.error(`creating recipe: ${err}`);
log.error(err);
}).finally(() => {
streamResponse.cancel();

View File

@@ -21,7 +21,7 @@ async function processUpdateRecommendations(
return true;
}) as ReviewResource[];
streamResponse.enqueue("Fetched all movies");
streamResponse.info("fetched all movies");
let done = 0;
const total = movies.length;
@@ -41,7 +41,7 @@ async function processUpdateRecommendations(
console.log(err);
}
done++;
streamResponse.enqueue(
streamResponse.info(
`${Math.floor((done / total) * 100)}% [${
done + 1
}/${total}] ${movie.name}`,
@@ -50,7 +50,7 @@ async function processUpdateRecommendations(
console.log(err);
});
streamResponse.enqueue("100% Finished");
streamResponse.info("100% Finished");
}
export const handler: Handlers = {

View File

@@ -50,7 +50,7 @@ export default function Greet(
context={article}
>
<RedirectSearchHandler />
<KMenu type="main" context={{ type: "articles" }} />
<KMenu type="main" context={article} />
<MetaTags resource={article} />
<PageHero