feat: fallback to unsplash cover when article contains no image
This commit is contained in:
@@ -7,6 +7,7 @@ export const PROXY_PASSWORD = Deno.env.get("PROXY_PASSWORD");
|
||||
export const TMDB_API_KEY = Deno.env.get("TMDB_API_KEY");
|
||||
export const OPENAI_API_KEY = Deno.env.get("OPENAI_API_KEY");
|
||||
export const YOUTUBE_API_KEY = Deno.env.get("YOUTUBE_API_KEY");
|
||||
export const UNSPLASH_API_KEY = Deno.env.get("UNSPLASH_API_KEY");
|
||||
export const TELEGRAM_API_KEY = Deno.env.get("TELEGRAM_API_KEY")!;
|
||||
|
||||
export const GITEA_SERVER = Deno.env.get("GITEA_SERVER");
|
||||
|
||||
102
lib/helpers.ts
102
lib/helpers.ts
@@ -31,19 +31,54 @@ export const fixRenderedMarkdown = (content: string) => {
|
||||
});
|
||||
};
|
||||
|
||||
export async function fetchStream(url: string, cb: (chunk: string) => void) {
|
||||
const response = await fetch(url);
|
||||
const reader = response?.body?.getReader();
|
||||
if (reader) {
|
||||
while (true) {
|
||||
const { done, value } = await reader.read();
|
||||
if (done) return;
|
||||
const data = new TextDecoder().decode(value);
|
||||
data
|
||||
.split("$")
|
||||
.filter((d) => d && d.length)
|
||||
.map((d) => cb(Array.isArray(d) ? d[0] : d));
|
||||
}
|
||||
type StreamMessage = {
|
||||
type: "info";
|
||||
message: string;
|
||||
} | {
|
||||
type: "error";
|
||||
message: string;
|
||||
} | {
|
||||
type: "warning";
|
||||
message: string;
|
||||
} | {
|
||||
type: "finished";
|
||||
url: string;
|
||||
};
|
||||
|
||||
export async function fetchStream(
|
||||
url: string,
|
||||
cb: (chunk: StreamMessage) => void,
|
||||
init?: RequestInit,
|
||||
) {
|
||||
const res = await fetch(url, init);
|
||||
if (!res.body) return;
|
||||
|
||||
let buffer = "";
|
||||
const reader = res.body
|
||||
.pipeThrough(new TextDecoderStream())
|
||||
.pipeThrough(
|
||||
new TransformStream<string, string>({
|
||||
transform(chunk, controller) {
|
||||
buffer += chunk;
|
||||
let idx;
|
||||
while ((idx = buffer.indexOf("\n")) >= 0) {
|
||||
const line = buffer.slice(0, idx).trim();
|
||||
buffer = buffer.slice(idx + 1);
|
||||
if (line) controller.enqueue(line);
|
||||
}
|
||||
},
|
||||
flush(controller) {
|
||||
const line = buffer.trim();
|
||||
if (line) controller.enqueue(line);
|
||||
},
|
||||
}),
|
||||
)
|
||||
.getReader();
|
||||
|
||||
while (true) {
|
||||
const { done, value } = await reader.read();
|
||||
if (done) break;
|
||||
cb(JSON.parse(value));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -58,32 +93,53 @@ export function hashString(message: string) {
|
||||
}
|
||||
|
||||
export const createStreamResponse = () => {
|
||||
let controller: ReadableStreamController<ArrayBufferView>;
|
||||
const body = new ReadableStream({
|
||||
start(cont) {
|
||||
controller = cont;
|
||||
const encoder = new TextEncoder();
|
||||
let controller: ReadableStreamDefaultController<Uint8Array>;
|
||||
|
||||
const body = new ReadableStream<Uint8Array>({
|
||||
start(c) {
|
||||
controller = c;
|
||||
},
|
||||
});
|
||||
|
||||
const response = new Response(body, {
|
||||
headers: {
|
||||
"content-type": "text/plain",
|
||||
// newline-delimited JSON
|
||||
"content-type": "application/x-ndjson; charset=utf-8",
|
||||
// prevent intermediaries from buffering/transforming
|
||||
"cache-control": "no-cache, no-transform",
|
||||
"x-content-type-options": "nosniff",
|
||||
// nginx hint to disable proxy buffering
|
||||
"x-accel-buffering": "no",
|
||||
// if you control compression, keep it off for streams
|
||||
// "content-encoding": "identity",
|
||||
},
|
||||
});
|
||||
|
||||
function cancel() {
|
||||
controller.close();
|
||||
const send = (obj: unknown) => {
|
||||
controller.enqueue(encoder.encode(JSON.stringify(obj) + "\n")); // ← delimiter
|
||||
};
|
||||
const cancel = () => controller.close();
|
||||
|
||||
function info(message: string) {
|
||||
return send({ type: "info", message });
|
||||
}
|
||||
|
||||
function enqueue(chunk: string) {
|
||||
controller?.enqueue(new TextEncoder().encode("$" + chunk));
|
||||
function error(message: string) {
|
||||
return send({ type: "error", message });
|
||||
}
|
||||
|
||||
function warning(message: string) {
|
||||
return send({ type: "warning", message });
|
||||
}
|
||||
|
||||
return {
|
||||
response,
|
||||
cancel,
|
||||
enqueue,
|
||||
send,
|
||||
info,
|
||||
error,
|
||||
warning,
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
@@ -38,13 +38,13 @@ export function createLogger(scope: string, _options?: LoggerOptions): Logger {
|
||||
export function loggerFromStream(stream: StreamResponse) {
|
||||
return {
|
||||
debug: (...data: unknown[]) =>
|
||||
stream.enqueue(`${data.length > 1 ? data.join(" ") : data[0]}`),
|
||||
stream.info(`${data.length > 1 ? data.join(" ") : data[0]}`),
|
||||
info: (...data: unknown[]) =>
|
||||
stream.enqueue(`${data.length > 1 ? data.join(" ") : data[0]}`),
|
||||
stream.info(`${data.length > 1 ? data.join(" ") : data[0]}`),
|
||||
error: (...data: unknown[]) =>
|
||||
stream.enqueue(`[ERROR]: ${data.length > 1 ? data.join(" ") : data[0]}`),
|
||||
stream.error(`[ERROR]: ${data.length > 1 ? data.join(" ") : data[0]}`),
|
||||
warn: (...data: unknown[]) =>
|
||||
stream.enqueue(`[WARN]: ${data.length > 1 ? data.join(" ") : data[0]}`),
|
||||
stream.warning(`[WARN]: ${data.length > 1 ? data.join(" ") : data[0]}`),
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@@ -106,8 +106,11 @@ export async function createResource(
|
||||
body: isJson ? JSON.stringify(content) : content,
|
||||
});
|
||||
if (!response.ok) {
|
||||
const text = await response.text();
|
||||
throw new Error(
|
||||
`Failed to create resource (resources/${path}) : ${response.status}`,
|
||||
`failed to create resource (resources/${path}): ${
|
||||
text || response.status
|
||||
}`,
|
||||
);
|
||||
}
|
||||
return response.json();
|
||||
|
||||
@@ -195,6 +195,23 @@ respond with a plain unordered list each item starting with the year the movie w
|
||||
return recommendations;
|
||||
};
|
||||
|
||||
export async function createUnsplashSearchTerm(content: string) {
|
||||
if (!openAI) return;
|
||||
const chatCompletion = await openAI.chat.completions.create({
|
||||
model: model,
|
||||
messages: [
|
||||
{
|
||||
role: "system",
|
||||
content:
|
||||
"Please respond with a search term for unsplash for the following article",
|
||||
},
|
||||
{ role: "user", content: content.slice(0, 10_000) },
|
||||
],
|
||||
});
|
||||
|
||||
return chatCompletion.choices[0].message.content?.toLowerCase();
|
||||
}
|
||||
|
||||
export async function createTags(content: string) {
|
||||
if (!openAI) return;
|
||||
const chatCompletion = await openAI.chat.completions.create({
|
||||
|
||||
@@ -9,7 +9,7 @@ export async function fetchHtmlWithPlaywright(
|
||||
fetchUrl: string,
|
||||
streamResponse: ReturnType<typeof createStreamResponse>,
|
||||
): Promise<string> {
|
||||
streamResponse.enqueue("booting up playwright");
|
||||
streamResponse.info("booting up playwright");
|
||||
|
||||
const config: Parameters<typeof firefox.launch>[0] = {};
|
||||
if (env.PROXY_SERVER) {
|
||||
@@ -24,7 +24,7 @@ export async function fetchHtmlWithPlaywright(
|
||||
// Launch the Playwright browser
|
||||
const browser = await firefox.launch(config);
|
||||
|
||||
streamResponse.enqueue("fetching html");
|
||||
streamResponse.info("fetching html");
|
||||
|
||||
try {
|
||||
// Open a new browser context and page
|
||||
@@ -42,7 +42,7 @@ export async function fetchHtmlWithPlaywright(
|
||||
|
||||
return html;
|
||||
} catch (error) {
|
||||
streamResponse.enqueue("error fetching html");
|
||||
streamResponse.error("error fetching html");
|
||||
console.error(error);
|
||||
return "";
|
||||
} finally {
|
||||
|
||||
29
lib/unsplash.ts
Normal file
29
lib/unsplash.ts
Normal file
@@ -0,0 +1,29 @@
|
||||
import { UNSPLASH_API_KEY } from "./env.ts";
|
||||
|
||||
const API_URL = "https://api.unsplash.com";
|
||||
|
||||
export async function getImageBySearchTerm(
|
||||
searchTerm: string,
|
||||
): Promise<string | undefined> {
|
||||
if (!UNSPLASH_API_KEY) {
|
||||
throw new Error("UNSPLASH_API_KEY is not set");
|
||||
}
|
||||
|
||||
const url = new URL("/search/photos", API_URL);
|
||||
url.searchParams.append("query", searchTerm);
|
||||
url.searchParams.append("per_page", "1");
|
||||
url.searchParams.append("orientation", "landscape");
|
||||
|
||||
const response = await fetch(url.toString(), {
|
||||
headers: {
|
||||
Authorization: `Client-ID ${UNSPLASH_API_KEY}`,
|
||||
},
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`Unsplash API request failed: ${response.statusText}`);
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
return data.results[0]?.urls?.regular;
|
||||
}
|
||||
@@ -1,6 +1,8 @@
|
||||
import { JSDOM } from "jsdom";
|
||||
import { fetchHtmlWithPlaywright } from "./playwright.ts";
|
||||
import { createStreamResponse } from "./helpers.ts";
|
||||
import { Defuddle } from "defuddle/node";
|
||||
import TurndownService from "turndown";
|
||||
|
||||
/**
|
||||
* Mutates the given JSDOM instance: rewrites all relevant URL-bearing attributes
|
||||
@@ -164,6 +166,8 @@ function absolutizeMetaRefresh(content: string, base: string): string {
|
||||
return `${delay}; url=${abs}`;
|
||||
}
|
||||
|
||||
const turndownService = new TurndownService();
|
||||
|
||||
export async function webScrape(
|
||||
url: string,
|
||||
streamResponse: ReturnType<typeof createStreamResponse>,
|
||||
@@ -172,5 +176,12 @@ export async function webScrape(
|
||||
const html = await fetchHtmlWithPlaywright(url, streamResponse);
|
||||
const dom = new JSDOM(html);
|
||||
absolutizeDomUrls(dom, u.origin);
|
||||
return dom;
|
||||
|
||||
const result = await Defuddle(dom, url);
|
||||
|
||||
return {
|
||||
...result,
|
||||
dom,
|
||||
markdown: turndownService.turndown(result.content),
|
||||
};
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user