feat: url scraper to recipe

This commit is contained in:
max_richter 2025-01-18 00:46:05 +01:00
parent 6112d007c2
commit d4cccacc28
24 changed files with 1349 additions and 137 deletions

View File

@ -7,6 +7,7 @@ WORKDIR /app
COPY . .
RUN apk add curl libstdc++ &&\
deno run -A npm:playwright install firefox &&\
deno install --allow-import --allow-ffi --allow-scripts=npm:sharp@0.33.5-rc.1 -e main.ts &&\
sed -i -e 's/"deno"/"no-deno"/' node_modules/@libsql/client/package.json &&\
mkdir -p $DATA_DIR

494
article.html Normal file

File diff suppressed because one or more lines are too long

View File

@ -95,7 +95,7 @@ export function Card(
export function ResourceCard(
{ res, sublink = "movies" }: { sublink?: string; res: GenericResource },
) {
const { meta: { image } = {} } = res;
const { meta: { image } = {} } = res || {};
const imageUrl = image
? `/api/images?image=${image}&width=200&height=200`

View File

@ -1,22 +1,22 @@
import { ComponentChildren } from "preact";
import Search from "@islands/Search.tsx";
import { GenericResource, SearchResult } from "@lib/types.ts";
import { GenericResource } from "@lib/types.ts";
export type Props = {
children: ComponentChildren;
title?: string;
name?: string;
url: URL;
url: URL | string;
description?: string;
context?: { type: string };
searchResults?: GenericResource[];
};
export const MainLayout = (
{ children, url, title, context, searchResults }: Props,
{ children, url, context, searchResults }: Props,
) => {
const _url = typeof url === "string" ? new URL(url) : url;
const hasSearch = _url.search.includes("q=");
const hasSearch = _url?.search?.includes("q=");
if (hasSearch) {
return (

View File

@ -34,25 +34,24 @@
"drizzle-kit": "npm:drizzle-kit@^0.30.1",
"drizzle-orm": "npm:drizzle-orm@^0.38.3",
"fuzzysort": "npm:fuzzysort@^3.1.0",
"playwright": "npm:playwright@^1.49.1",
"playwright-extra": "npm:playwright-extra@^4.3.6",
"preact": "https://esm.sh/preact@10.22.0",
"preact-render-to-string": "https://esm.sh/*preact-render-to-string@6.2.2",
"preact/": "https://esm.sh/preact@10.22.0/",
"gfm": "jsr:@deno/gfm",
"puppeteer-extra-plugin-stealth": "npm:puppeteer-extra-plugin-stealth@^2.11.2",
"tailwindcss": "npm:tailwindcss@^3.4.17",
"tailwindcss/": "npm:/tailwindcss@^3.4.17/",
"tailwindcss/plugin": "npm:/tailwindcss@^3.4.17/plugin.js",
"camelcase-css": "npm:camelcase-css",
"tsx": "npm:tsx@^4.19.2",
"yaml": "https://deno.land/std@0.197.0/yaml/mod.ts",
"zod": "https://deno.land/x/zod@v3.21.4/mod.ts",
"zod": "npm:zod@^3.24.1",
"domparser": "https://deno.land/x/deno_dom@v0.1.48/deno-dom-wasm.ts",
"fs": "https://deno.land/std/fs/mod.ts",
"imagemagick": "https://deno.land/x/imagemagick_deno@0.0.31/mod.ts"
},
"scopes": {
"https://deno.land/x/emoji/": {
"https://deno.land/x/my-library@1.0.0/mod.ts": "./patched/mod.ts"
}
},
"compilerOptions": {
"jsx": "react-jsx",
"jsxImportSource": "preact"

View File

@ -14,6 +14,7 @@ import * as $api_articles_index from "./routes/api/articles/index.ts";
import * as $api_auth_callback from "./routes/api/auth/callback.ts";
import * as $api_auth_login from "./routes/api/auth/login.ts";
import * as $api_auth_logout from "./routes/api/auth/logout.ts";
import * as $api_cache from "./routes/api/cache.ts";
import * as $api_images_index from "./routes/api/images/index.ts";
import * as $api_index from "./routes/api/index.ts";
import * as $api_logs from "./routes/api/logs.ts";
@ -22,6 +23,8 @@ import * as $api_movies_enhance_name_ from "./routes/api/movies/enhance/[name].t
import * as $api_movies_index from "./routes/api/movies/index.ts";
import * as $api_query_index from "./routes/api/query/index.ts";
import * as $api_recipes_name_ from "./routes/api/recipes/[name].ts";
import * as $api_recipes_create_index from "./routes/api/recipes/create/index.ts";
import * as $api_recipes_create_parseJsonLd from "./routes/api/recipes/create/parseJsonLd.ts";
import * as $api_recipes_index from "./routes/api/recipes/index.ts";
import * as $api_recommendation_all from "./routes/api/recommendation/all.ts";
import * as $api_recommendation_data from "./routes/api/recommendation/data.ts";
@ -50,6 +53,7 @@ import * as $KMenu_commands_add_movie_infos from "./islands/KMenu/commands/add_m
import * as $KMenu_commands_add_series_infos from "./islands/KMenu/commands/add_series_infos.ts";
import * as $KMenu_commands_create_article from "./islands/KMenu/commands/create_article.ts";
import * as $KMenu_commands_create_movie from "./islands/KMenu/commands/create_movie.ts";
import * as $KMenu_commands_create_recipe from "./islands/KMenu/commands/create_recipe.ts";
import * as $KMenu_commands_create_recommendations from "./islands/KMenu/commands/create_recommendations.ts";
import * as $KMenu_commands_create_series from "./islands/KMenu/commands/create_series.ts";
import * as $KMenu_types from "./islands/KMenu/types.ts";
@ -71,6 +75,7 @@ const manifest = {
"./routes/api/auth/callback.ts": $api_auth_callback,
"./routes/api/auth/login.ts": $api_auth_login,
"./routes/api/auth/logout.ts": $api_auth_logout,
"./routes/api/cache.ts": $api_cache,
"./routes/api/images/index.ts": $api_images_index,
"./routes/api/index.ts": $api_index,
"./routes/api/logs.ts": $api_logs,
@ -79,6 +84,9 @@ const manifest = {
"./routes/api/movies/index.ts": $api_movies_index,
"./routes/api/query/index.ts": $api_query_index,
"./routes/api/recipes/[name].ts": $api_recipes_name_,
"./routes/api/recipes/create/index.ts": $api_recipes_create_index,
"./routes/api/recipes/create/parseJsonLd.ts":
$api_recipes_create_parseJsonLd,
"./routes/api/recipes/index.ts": $api_recipes_index,
"./routes/api/recommendation/all.ts": $api_recommendation_all,
"./routes/api/recommendation/data.ts": $api_recommendation_data,
@ -112,6 +120,7 @@ const manifest = {
"./islands/KMenu/commands/create_article.ts":
$KMenu_commands_create_article,
"./islands/KMenu/commands/create_movie.ts": $KMenu_commands_create_movie,
"./islands/KMenu/commands/create_recipe.ts": $KMenu_commands_create_recipe,
"./islands/KMenu/commands/create_recommendations.ts":
$KMenu_commands_create_recommendations,
"./islands/KMenu/commands/create_series.ts": $KMenu_commands_create_series,

View File

@ -1,15 +1,15 @@
import { Signal } from "@preact/signals";
import type {
Ingredient,
IngredientGroup,
Ingredients,
} from "../lib/recipes.ts";
import type { Ingredient, IngredientGroup } from "@lib/recipeSchema.ts";
import { FunctionalComponent } from "preact";
function numberToString(num: number) {
return (Math.floor(num * 4) / 4).toString();
}
function stringToNumber(str: string) {
return parseFloat(str);
}
const Ingredient = (
{ ingredient, amount, key = "", portion = 1 }: {
ingredient: Ingredient;
@ -18,10 +18,12 @@ const Ingredient = (
portion?: number;
},
) => {
const { type, amount: _amount, unit } = ingredient;
const { name, quantity, unit } = ingredient;
const finalAmount = (typeof _amount === "number" && amount)
? (_amount / portion) * (amount?.value || 1)
const parsedQuantity = stringToNumber(quantity);
const finalAmount = (typeof parsedQuantity === "number" && amount)
? (parsedQuantity / portion) * (amount?.value || 1)
: "";
return (
@ -30,13 +32,17 @@ const Ingredient = (
{numberToString(finalAmount || 0) +
(typeof unit === "string" ? unit : "")}
</td>
<td class="px-4 py-2">{type}</td>
<td class="px-4 py-2">{name}</td>
</tr>
);
};
export const IngredientsList: FunctionalComponent<
{ ingredients: Ingredients; amount: Signal<number>; portion?: number }
{
ingredients: (Ingredient | IngredientGroup)[];
amount: Signal<number>;
portion?: number;
}
> = (
{ ingredients, amount, portion },
) => {
@ -44,10 +50,9 @@ export const IngredientsList: FunctionalComponent<
<table class="w-full border-collapse table-auto">
<tbody>
{ingredients.map((item, index) => {
if ("name" in item) {
if ("items" in item) {
// Render IngredientGroup
const { name, ingredients: groupIngredients } =
item as IngredientGroup;
const { name, items: groupIngredients } = item as IngredientGroup;
return (
<>

View File

@ -6,6 +6,7 @@ import { getCookie } from "@lib/string.ts";
import { addSeriesInfo } from "@islands/KMenu/commands/add_series_infos.ts";
import { createNewSeries } from "@islands/KMenu/commands/create_series.ts";
import { updateAllRecommendations } from "@islands/KMenu/commands/create_recommendations.ts";
import { createNewRecipe } from "@islands/KMenu/commands/create_recipe.ts";
export const menus: Record<string, Menu> = {
main: {
@ -74,6 +75,7 @@ export const menus: Record<string, Menu> = {
createNewArticle,
createNewMovie,
createNewSeries,
createNewRecipe,
addMovieInfos,
updateAllRecommendations,
],

View File

@ -0,0 +1,46 @@
import { MenuEntry } from "@islands/KMenu/types.ts";
import { fetchStream, isValidUrl } from "@lib/helpers.ts";
import { getCookie } from "@lib/string.ts";
export const createNewRecipe: MenuEntry = {
title: "Create new recipe",
meta: "",
icon: "IconSquareRoundedPlus",
cb: (state) => {
state.menus["input_link"] = {
title: "Link:",
entries: [],
};
state.activeMenu.value = "input_link";
state.activeState.value = "input";
const unsub = state.commandInput.subscribe((value) => {
if (isValidUrl(value)) {
unsub();
state.activeState.value = "loading";
fetchStream("/api/recipes/create?url=" + value, (chunk) => {
if (chunk.startsWith("id:")) {
state.loadingText.value = "Finished";
setTimeout(() => {
globalThis.location.href = "/recipes/" +
chunk.replace("id:", "").trim();
}, 500);
} else {
state.loadingText.value = chunk;
}
});
}
});
},
visible: () => {
if (!getCookie("session_cookie")) return false;
if (
!globalThis?.location?.pathname?.includes("recipes") &&
globalThis?.location?.pathname !== "/"
) return false;
return true;
},
};

View File

@ -85,6 +85,9 @@ export function createCrud<T extends GenericResource>(
}
const content = await getDocument(path);
if (!content) {
return;
}
const parsed = parse(content, id);
@ -92,12 +95,13 @@ export function createCrud<T extends GenericResource>(
return addThumbnailToResource(parsed);
}
const doc = { ...parsed, content };
cache.set(path, doc);
cache.set(path, doc, { expires: 10 * 1000 });
return doc;
}
function create(id: string, content: string | ArrayBuffer | T) {
const path = pathFromId(id);
cache.set("all", undefined);
if (
typeof content === "string" || content instanceof ArrayBuffer
) {
@ -105,7 +109,9 @@ export function createCrud<T extends GenericResource>(
}
if (render) {
return createDocument(path, render(content));
const rendered = render(content);
cache.set(path, content);
return createDocument(path, rendered);
}
throw new Error("No renderer defined for " + prefix + " CRUD");
@ -114,7 +120,11 @@ export function createCrud<T extends GenericResource>(
async function update(id: string, updater: (r: Root) => Root) {
const path = pathFromId(id);
const content = await getDocument(path);
if (!content) {
return;
}
const newDoc = transformDocument(content, updater);
cache.set("all", undefined);
await createDocument(path, newDoc);
}
@ -132,7 +142,8 @@ export function createCrud<T extends GenericResource>(
const id = doc.name.replace(prefix, "").replace(/\.md$/, "");
return read(id);
}),
)).sort(sortFunction<T>(sort));
)).sort(sortFunction<T>(sort)).filter((v) => !!v);
cache.set("all", parsed);
return parsed;
}

View File

@ -58,6 +58,10 @@ export function createDocument(
log.info("creating document", { name });
if (typeof content === "string") {
updateDocument(name, content).catch(log.error);
}
return fetch(SILVERBULLET_SERVER + "/" + name, {
body: content,
method: "PUT",
@ -65,25 +69,49 @@ export function createDocument(
});
}
export async function getDocument(name: string): Promise<string> {
const documents = await db.select().from(documentTable).where(
eq(documentTable.name, name),
).limit(1);
if (documents[0]?.content) return documents[0].content;
async function fetchDocument(name: string) {
log.debug("fetching document", { name });
const headers = new Headers();
headers.append("X-Sync-Mode", "true");
const response = await fetch(SILVERBULLET_SERVER + "/" + name, { headers });
const text = await response.text();
if (response.status === 404) {
return;
}
return response.text();
}
await db.update(documentTable).set({
content: text,
}).where(eq(documentTable.name, name));
export async function getDocument(name: string): Promise<string | undefined> {
const documents = await db.select().from(documentTable).where(
eq(documentTable.name, name),
).limit(1);
// This updates the document in the background
fetchDocument(name).then((content) => {
if (content) {
updateDocument(name, content);
} else {
db.delete(documentTable).where(eq(documentTable.name, name));
}
}).catch(
log.error,
);
if (documents[0]?.content) return documents[0].content;
const text = await fetchDocument(name);
if (!text) {
db.delete(documentTable).where(eq(documentTable.name, name));
return;
}
await updateDocument(name, text);
return text;
}
export function updateDocument(name: string, content: string) {
return db.update(documentTable).set({
content,
}).where(eq(documentTable.name, name));
}
export function transformDocument(input: string, cb: (r: Root) => Root) {
const out = unified()
.use(remarkParse)

View File

@ -1,7 +1,9 @@
import OpenAI from "https://deno.land/x/openai@v4.52.0/mod.ts";
import OpenAI from "https://deno.land/x/openai@v4.69.0/mod.ts";
import { zodResponseFormat } from "https://deno.land/x/openai@v4.69.0/helpers/zod.ts";
import { OPENAI_API_KEY } from "@lib/env.ts";
import { hashString } from "@lib/helpers.ts";
import { createCache } from "@lib/cache.ts";
import recipeSchema from "@lib/recipeSchema.ts";
const openAI = OPENAI_API_KEY && new OpenAI({ apiKey: OPENAI_API_KEY });
@ -208,3 +210,21 @@ export async function createTags(content: string) {
return extractListFromResponse(res).map((v) => v.replaceAll(" ", "-"));
}
export async function extractRecipe(content: string) {
if (!openAI) return;
const completion = await openAI.beta.chat.completions.parse({
model: "gpt-4o-2024-08-06",
temperature: 0.1,
messages: [
{
role: "system",
content: "Extract the recipe information from the provided markdown.",
},
{ role: "user", content },
],
response_format: zodResponseFormat(recipeSchema, "recipe-v2"),
});
return recipeSchema.parse(completion.choices[0].message.parsed);
}

35
lib/parseIngredient.ts Normal file
View File

@ -0,0 +1,35 @@
import { parseIngredient as _parseIngredient } from "https://esm.sh/parse-ingredient@1.0.1";
export function parseIngredient(text: string) {
const ing = _parseIngredient(text, {
additionalUOMs: {
tableSpoon: {
short: "EL",
plural: "Table Spoons",
alternates: ["el", "EL", "Tbsp", "tbsp"],
},
teaSpoon: {
short: "TL",
plural: "Tea Spoon",
alternates: ["tl", "TL", "Tsp", "tsp", "teaspoon"],
},
litre: {
short: "L",
plural: "liters",
alternates: ["L", "l"],
},
paket: {
short: "Paket",
plural: "Pakets",
alternates: ["Paket", "paket"],
},
},
});
return {
name: ing[0].description,
unit: ing[0].unitOfMeasure || "",
quantity: ing[0].quantity?.toString() || "",
note: "",
};
}

55
lib/playwright.ts Normal file
View File

@ -0,0 +1,55 @@
import { firefox } from "npm:playwright-extra";
import { createStreamResponse } from "@lib/helpers.ts";
import StealthPlugin from "npm:puppeteer-extra-plugin-stealth";
const userAgentStrings = [
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.2227.0 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.3497.92 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36",
];
firefox.use(StealthPlugin());
export async function fetchHtmlWithPlaywright(
fetchUrl: string,
streamResponse: ReturnType<typeof createStreamResponse>,
): Promise<string> {
streamResponse.enqueue("booting up playwright");
// Launch the Playwright browser
const browser = await firefox.launch();
streamResponse.enqueue("fetching html");
try {
// Open a new browser context and page
const context = await browser.newContext({
userAgent:
userAgentStrings[Math.floor(Math.random() * userAgentStrings.length)],
});
//add init script
await context.addInitScript(
"Object.defineProperty(navigator, 'webdriver', {get: () => undefined})",
);
const page = await context.newPage();
// Navigate to the URL
await page.goto(fetchUrl, {
waitUntil: "domcontentloaded", // Wait for the DOM to load
});
// Get the HTML content of the page
const html = await page.content();
return html;
} catch (error) {
streamResponse.enqueue("error fetching html");
console.error(error);
return "";
} finally {
// Close the browser
await browser.close();
}
}

39
lib/recipeSchema.ts Normal file
View File

@ -0,0 +1,39 @@
import { z } from "npm:zod";
export const IngredientSchema = z.object({
quantity: z.string().describe(
"e.g., '2', '1/2', or an empty string for 'to taste'",
),
unit: z.string().describe('e.g., "g", "tbsp", "cup"'),
name: z.string().describe('e.g., "sugar", "flour"'), //
note: z.string().describe('optional, e.g., "sifted", "chopped finely"'),
});
export type Ingredient = z.infer<typeof IngredientSchema>;
export const IngredientGroupSchema = z.object({
name: z.string(),
items: z.array(IngredientSchema),
});
export type IngredientGroup = z.infer<typeof IngredientGroupSchema>;
const recipeSchema = z.object({
title: z.string().describe(
"Title of the Recipe, without the name of the website or author",
),
image: z.string().describe("URL of the main image of the recipe"),
author: z.string().describe("author of the Recipe (optional)"),
description: z.string().describe("Optional, short description of the recipe"),
ingredients: z.array(z.union([IngredientSchema, IngredientGroupSchema]))
.describe("List of ingredients"),
instructions: z.array(z.string()).describe("List of instructions"),
servings: z.number().describe("Amount of Portions"),
prepTime: z.number().describe("Preparation time in minutes"),
cookTime: z.number().describe("Cooking time in minutes"),
totalTime: z.number().describe("Total time in minutes"),
tags: z.array(z.string()).describe(
"List of tags (e.g., ['vegan', 'dessert'])",
),
notes: z.array(z.string()).describe("Optional notes about the recipe"),
});
export default recipeSchema;

View File

@ -4,31 +4,22 @@ import {
getTextOfRange,
parseDocument,
} from "@lib/documents.ts";
import { parse } from "yaml";
import { parseIngredient } from "https://esm.sh/parse-ingredient@1.0.1";
import { parse, stringify } from "yaml";
import { createCrud } from "@lib/crud.ts";
import { extractHashTags } from "@lib/string.ts";
export type IngredientGroup = {
name: string;
ingredients: Ingredient[];
};
export type Ingredient = {
type: string;
unit?: string;
amount?: string;
};
export type Ingredients = (Ingredient | IngredientGroup)[];
import { Ingredient, IngredientGroup } from "@lib/recipeSchema.ts";
import { fixRenderedMarkdown } from "@lib/helpers.ts";
import { parseIngredient } from "@lib/parseIngredient.ts";
export type Recipe = {
type: "recipe";
id: string;
name: string;
description?: string;
ingredients: Ingredients;
preparation?: string;
markdown?: string;
ingredients: (Ingredient | IngredientGroup)[];
instructions?: string[];
notes?: string[];
tags: string[];
meta?: {
time?: string;
@ -49,38 +40,8 @@ function parseIngredientItem(listItem: DocumentChild): Ingredient | undefined {
const text = children.map((c) => getTextOfChild(c)).join(" ").trim();
const ing = parseIngredient(text, {
additionalUOMs: {
tableSpoon: {
short: "EL",
plural: "Table Spoons",
alternates: ["el", "EL", "Tbsp", "tbsp"],
},
teaSpoon: {
short: "TL",
plural: "Tea Spoon",
alternates: ["tl", "TL", "Tsp", "tsp", "teaspoon"],
},
litre: {
short: "L",
plural: "liters",
alternates: ["L", "l"],
},
paket: {
short: "Paket",
plural: "Pakets",
alternates: ["Paket", "paket"],
},
},
});
return {
type: ing[0].description,
unit: ing[0].unitOfMeasure,
amount: ing[0].quantity,
};
return parseIngredient(text);
}
return;
}
const isIngredient = (item: Ingredient | undefined): item is Ingredient => {
@ -112,9 +73,10 @@ function parseIngredients(children: DocumentChild[]): Recipe["ingredients"] {
if (!nextChild || nextChild.type !== "list") continue;
const name = getTextOfChild(child);
ingredients.push({
name: getTextOfChild(child) || "",
ingredients: parseIngredientsList(nextChild),
name: name || "",
items: parseIngredientsList(nextChild),
});
skip = true;
continue;
@ -128,6 +90,19 @@ function parseIngredients(children: DocumentChild[]): Recipe["ingredients"] {
return ingredients;
}
function extractSteps(
content: string,
seperator: RegExp = /\n(?=\d+\.)/g,
): string[] {
const steps = content.split(seperator).map((step) => {
const match = step.match(/^(\d+)\.\s*(.*)/);
if (!match) return;
const [, , text] = match;
return text;
}).filter((step) => !!step);
return steps as string[];
}
export function parseRecipe(original: string, id: string): Recipe {
const doc = parseDocument(original);
@ -140,8 +115,8 @@ export function parseRecipe(original: string, id: string): Recipe {
if (child.type === "yaml") {
try {
meta = parse(child.value) as Recipe["meta"];
} catch (_) {
// console.log("Error parsing YAML", err);
} catch (err) {
console.log("Error parsing YAML", err);
}
continue;
}
@ -168,7 +143,14 @@ export function parseRecipe(original: string, id: string): Recipe {
const ingredients = parseIngredients(groups[1]);
const preparation = getTextOfRange(groups[2], original);
const instructionText = getTextOfRange(groups[2], original);
let instructions = extractSteps(instructionText || "");
if (instructions.length <= 1) {
const d = extractSteps(instructionText || "", /\n/g);
if (d.length > instructions.length) {
instructions = d;
}
}
const tags = extractHashTags(description || "");
if (description) {
@ -183,15 +165,88 @@ export function parseRecipe(original: string, id: string): Recipe {
meta,
name,
tags,
markdown: original,
notes: getTextOfRange(groups[3], original)?.split("\n"),
description,
ingredients,
preparation,
instructions,
};
}
function filterUndefinedFromObject<T extends { [key: string]: unknown }>(
obj: T,
) {
return Object.fromEntries(
Object.entries(obj).filter(([_, v]) => v !== undefined),
);
}
export function renderRecipe(recipe: Recipe) {
const meta = filterUndefinedFromObject(recipe.meta || {});
// Clean up meta properties
delete meta.thumbnail;
delete meta.average;
const recipeImage = meta.image ? `![](${meta.image})` : "";
// Format ingredient groups and standalone ingredients
const ingredients = recipe.ingredients
.map((item) => {
if ("items" in item) {
return `\n*${item.name}*\n${
item.items
.map((ing) => {
if (ing.quantity && ing.unit) {
return `- **${ing.quantity.trim() || ""}${
ing.unit.trim() || ""
}** ${ing.name}`;
}
return `- ${ing.name}`;
})
.join("\n")
}`;
}
if (item.quantity && item.unit) {
return `- **${item.quantity?.trim() || ""}${
item.unit?.trim() || ""
}** ${item.name}`;
}
return `- ${item.name}`;
})
.join("\n");
// Format instructions as a numbered list
const instructions = recipe.instructions
? recipe.instructions.map((step, i) => `${i + 1}. ${step}`).join("\n")
: "";
// Render the final markdown
return fixRenderedMarkdown(`${
Object.keys(meta).length
? `---
${stringify(meta)}
---`
: `---
---`
}
# ${recipe.name}
${recipe.meta?.image ? recipeImage : ""}
${recipe.tags.map((t) => `#${t.replaceAll(" ", "-")}`).join(" ")}
${recipe.description || ""}
---
${ingredients ? `## Ingredients\n\n${ingredients}\n\n---\n` : ""}
${instructions ? `${instructions}\n\n---` : ""}
${recipe.notes?.length ? `\n${recipe.notes.join("\n")}` : ""}
`);
}
const crud = createCrud<Recipe>({
prefix: `Recipes/`,
parse: parseRecipe,
render: renderRecipe,
hasThumbnails: true,
});

View File

@ -6,17 +6,10 @@ export function formatDate(date: Date): string {
}
export function safeFileName(inputString: string): string {
// Convert the string to lowercase
let fileName = inputString.toLowerCase();
// Replace spaces with underscores
fileName = fileName.replace(/ /g, "_");
// Remove characters that are not safe for file names
fileName = fileName.replace(/[^\w.-]/g, "");
fileName = fileName.replaceAll(":", "");
return fileName;
}

View File

@ -1,5 +1,5 @@
import { Head } from "$fresh/runtime.ts";
import Image from "@components/Image.tsx";
import { MainLayout } from "@components/layouts/main.tsx";
export default function Error404() {
return (
@ -7,22 +7,17 @@ export default function Error404() {
<Head>
<title>404 - Page not found</title>
</Head>
<div class="px-4 py-8 mx-auto bg-[#86efac]">
<div class="max-w-screen-md mx-auto flex flex-col items-center justify-center">
<Image
class="my-6"
src="/logo.svg"
width="128"
height="128"
alt="the fresh logo: a sliced lemon dripping with juice"
/>
<h1 class="text-4xl font-bold">404 - Page not found</h1>
<p class="my-4">
The page you were looking for doesn't exist.
</p>
<a href="/" class="underline">Go back home</a>
<MainLayout>
<div class="px-8 text-white mt-10">
<div class="max-w-screen-md mx-auto flex flex-col items-center justify-center">
<h1 class="text-4xl font-bold">404 - Page not found</h1>
<p class="my-4">
The page you were looking for doesn't exist.
</p>
<a href="/" class="underline">Go back home</a>
</div>
</div>
</div>
</MainLayout>
</>
);
}

View File

@ -1,6 +1,6 @@
import { Handlers } from "$fresh/server.ts";
import { Readability } from "https://cdn.skypack.dev/@mozilla/readability";
import { DOMParser } from "https://deno.land/x/deno_dom@v0.1.38/deno-dom-wasm.ts";
import { DOMParser } from "domparser";
import { AccessDeniedError, BadRequestError } from "@lib/errors.ts";
import { createStreamResponse, isValidUrl } from "@lib/helpers.ts";
import * as openai from "@lib/openai.ts";

12
routes/api/cache.ts Normal file
View File

@ -0,0 +1,12 @@
import { Handlers } from "$fresh/server.ts";
import { documentTable } from "@lib/db/schema.ts";
import { db } from "@lib/db/sqlite.ts";
import { json } from "@lib/helpers.ts";
export const handler: Handlers = {
async DELETE() {
await db.delete(documentTable).run();
return json({ status: "ok" });
},
};

View File

@ -71,7 +71,6 @@ const POST = async (
if (posterPath && !movie.meta?.image) {
const poster = await tmdb.getMoviePoster(posterPath);
const extension = fileExtension(posterPath);
finalPath = `Media/movies/images/${safeFileName(name)}_cover.${extension}`;
await createDocument(finalPath, poster);
movie.meta = movie.meta || {};

View File

@ -0,0 +1,264 @@
import { Handlers } from "$fresh/server.ts";
import { Readability } from "https://cdn.skypack.dev/@mozilla/readability";
import { DOMParser } from "domparser";
import { AccessDeniedError, BadRequestError } from "@lib/errors.ts";
import { createStreamResponse, isValidUrl } from "@lib/helpers.ts";
import * as openai from "@lib/openai.ts";
import tds from "https://cdn.skypack.dev/turndown@7.2.0";
import { createLogger } from "@lib/log.ts";
import { createRecipe, Recipe } from "@lib/resource/recipes.ts";
import recipeSchema from "@lib/recipeSchema.ts";
import { fileExtension } from "https://deno.land/x/file_extension@v2.1.0/mod.ts";
import { safeFileName } from "@lib/string.ts";
import { createDocument } from "@lib/documents.ts";
import { parseJsonLdToRecipeSchema } from "./parseJsonLd.ts";
import z from "npm:zod";
import { fetchHtmlWithPlaywright } from "@lib/playwright.ts";
const parser = new DOMParser();
const log = createLogger("api/article");
function makeUrlAbsolute(url: URL, src: string) {
if (src.startsWith("/")) {
return `${url.origin}${src.replace(/$\//, "")}`;
}
if (!src.startsWith("https://") && !src.startsWith("http://")) {
return `${url.origin.replace(/\/$/, "")}/${src.replace(/^\//, "")})`;
}
return src;
}
async function extractUsingAI(
url: URL,
document: Parameters<typeof Readability>[0] | null,
streamResponse: ReturnType<typeof createStreamResponse>,
) {
const readable = new Readability(document);
const result = readable.parse();
const service = new tds({
headingStyle: "atx",
codeBlockStyle: "fenced",
hr: "---",
bulletListMarker: "-",
});
service.addRule("fix image links", {
filter: ["img"],
replacement: function (_: string, node: HTMLImageElement) {
const src = node.getAttribute("src");
const alt = node.getAttribute("alt") || "";
if (!src || src.startsWith("data:image")) return "";
return `![${alt}](${makeUrlAbsolute(url, src)})`;
},
});
service.addRule("fix normal links", {
filter: ["a"],
replacement: function (content: string, node: HTMLImageElement) {
const href = node.getAttribute("href");
if (!href) return content;
if (href.startsWith("/")) {
return `[${content}](${url.origin}${href.replace(/$\//, "")})`;
}
if (href.startsWith("#")) {
if (content.length < 2) return "";
return `[${content}](${url.href}#${href})`.replace("##", "#");
}
if (!href.startsWith("https://") && !href.startsWith("http://")) {
return `[${content}](${url.origin.replace(/\/$/, "")}/${
href.replace(/^\//, "")
})`;
}
return `[${content}](${href})`;
},
});
const cleanDocument = parser.parseFromString(
result.content,
"text/html",
);
const markdown = service.turndown(cleanDocument);
streamResponse.enqueue("extracting recipe with openai");
console.log("------- MARKDOWN ------");
console.log(markdown);
console.log("-----------------------");
const recipe = await openai.extractRecipe(markdown);
console.log("------- EXTRACTED ------");
console.log(JSON.stringify(recipe, null, 2));
console.log("-----------------------");
return recipe;
}
async function processCreateRecipeFromUrl(
{ fetchUrl, streamResponse }: {
fetchUrl: string;
streamResponse: ReturnType<typeof createStreamResponse>;
},
) {
log.info("create article from url", { url: fetchUrl });
const url = new URL(fetchUrl);
streamResponse.enqueue("downloading article");
const html = await fetchHtmlWithPlaywright(fetchUrl, streamResponse);
streamResponse.enqueue("download success");
Deno.writeTextFile("article.html", html);
const document = parser.parseFromString(html, "text/html");
const title = document?.querySelector("title")?.innerText;
const images: HTMLImageElement[] = [];
document?.querySelectorAll("img").forEach((img) => {
images.push(img as unknown as HTMLImageElement);
});
const metaAuthor =
document?.querySelector('meta[name="twitter:creator"]')?.getAttribute(
"content",
) ||
document?.querySelector('meta[name="author"]')?.getAttribute("content");
const jsonLds = Array.from(
document?.querySelectorAll(
"script[type='application/ld+json']",
) as HTMLScriptElement[],
);
let recipe: z.infer<typeof recipeSchema> | undefined = undefined;
if (jsonLds.length > 0) {
for (const jsonLd of jsonLds) {
console.log({ content: jsonLd.textContent });
recipe = parseJsonLdToRecipeSchema(jsonLd.textContent || "");
if (recipe) break;
}
}
if (!recipe) {
recipe = await extractUsingAI(url, document, streamResponse);
}
const id = (recipe?.title || title || "").replaceAll(" ", "-");
if (!recipe) {
streamResponse.enqueue("failed to parse recipe");
streamResponse.cancel();
return;
}
if (!recipe.image) {
const largestImage = images.filter((img) => {
const src = img.getAttribute("src");
return !!src && !src.startsWith("data:");
}).sort((a, b) => {
const aSize = +(a.getAttribute("width") || 0) +
+(a.getAttribute("height") || 0);
const bSize = +(b.getAttribute("width") || 0) +
+(b.getAttribute("height") || 0);
return aSize > bSize ? -1 : 1;
})[0];
const src = largestImage.getAttribute("src");
if (src) {
recipe.image = makeUrlAbsolute(url, src);
}
}
if (!recipe) {
console.error("Failed to parse recipe");
streamResponse.enqueue("failed to parse recipe");
streamResponse.cancel();
return;
}
const newRecipe: Recipe = {
type: "recipe",
id,
name: recipe?.title || title || "",
description: recipe?.description,
ingredients: recipe?.ingredients || [],
instructions: recipe?.instructions || [],
notes: recipe?.notes,
tags: recipe.tags || [],
meta: {
image: recipe?.image,
time: recipe?.totalTime
? `${recipe?.totalTime?.toString()} minutes`
: undefined,
link: fetchUrl,
portion: recipe?.servings,
author: metaAuthor ?? recipe?.author,
},
};
if (newRecipe.meta?.image) {
const src = makeUrlAbsolute(url, newRecipe.meta.image);
if (src?.length > 5) {
const extension = fileExtension(new URL(src).pathname);
const finalPath = `Media/articles/images/${
safeFileName(id)
}_cover.${extension}`;
streamResponse.enqueue("downloading image");
try {
streamResponse.enqueue("downloading image");
const res = await fetch(src);
streamResponse.enqueue("saving image");
const buffer = await res.arrayBuffer();
await createDocument(finalPath, buffer);
newRecipe.meta.image = finalPath;
} catch (err) {
console.log("Failed to save image", err);
}
}
}
streamResponse.enqueue("finished processing, creating file");
console.log("------- CREATING ------");
console.log(JSON.stringify(recipe, null, 2));
console.log("-----------------------");
await createRecipe(newRecipe.id, newRecipe);
streamResponse.enqueue("id: " + newRecipe.id);
}
export const handler: Handlers = {
GET(req, ctx) {
const session = ctx.state.session;
if (!session) {
throw new AccessDeniedError();
}
const url = new URL(req.url);
const fetchUrl = url.searchParams.get("url");
if (!fetchUrl || !isValidUrl(fetchUrl)) {
throw new BadRequestError();
}
const streamResponse = createStreamResponse();
processCreateRecipeFromUrl({ fetchUrl, streamResponse }).then((article) => {
log.debug("created article from link", { article });
}).catch((err) => {
log.error(err);
}).finally(() => {
streamResponse.cancel();
});
return streamResponse.response;
},
};

View File

@ -0,0 +1,103 @@
import recipeSchema from "@lib/recipeSchema.ts";
import { parseIngredient } from "@lib/parseIngredient.ts";
export function parseJsonLdToRecipeSchema(jsonLdContent: string) {
try {
let data = JSON.parse(jsonLdContent);
const image = data.image;
// Handle nested data inside `mainEntity`
if (data["mainEntity"]) {
data = data["mainEntity"];
}
// Ensure it's a valid Recipe type
if (
typeof data !== "object" || !data["@type"] || data["@type"] !== "Recipe"
) {
return;
}
// Map and parse ingredients into the new schema
const ingredients = (data.recipeIngredient || []).map(
parseIngredient,
);
const instructions = Array.isArray(data.recipeInstructions)
? data.recipeInstructions.map((instr) => {
if (typeof instr === "string") return instr;
if (typeof instr === "object" && instr.text) return instr.text;
return "";
}).filter((instr) => instr.trim() !== "")
: [];
// Parse servings
const servings = parseServings(data.recipeYield);
// Parse times
const prepTime = parseDuration(data.prepTime);
const cookTime = parseDuration(data.cookTime);
const totalTime = parseDuration(data.totalTime);
// Extract tags
const tags = data.keywords
? Array.isArray(data.keywords)
? data.keywords
: data.keywords.split(",").map((tag: string) => tag.trim())
: [];
// Build the recipe object
const recipe = {
title: data.name || "Unnamed Recipe",
image: pickImage(image || data.image || ""),
author: Array.isArray(data.author)
? data.author.map((a: any) => a.name).join(", ")
: data.author?.name || "",
description: data.description || "",
ingredients,
instructions,
servings,
prepTime,
cookTime,
totalTime,
tags,
notes: data.notes || [],
};
// Validate against the schema
return recipeSchema.parse(recipe);
} catch (error) {
console.error("Invalid JSON-LD content or parsing error:", error);
return undefined;
}
}
function pickImage(images: string | string[]): string {
if (Array.isArray(images)) {
return images[0];
}
return images;
}
function parseServings(servingsData: any): number {
if (typeof servingsData === "string") {
const match = servingsData.match(/\d+/);
return match ? parseInt(match[0], 10) : 1;
}
if (typeof servingsData === "number") {
return servingsData;
}
return 1;
}
function parseDuration(duration: string | undefined): number {
if (!duration) return 0;
// Matches ISO 8601 durations (e.g., "PT30M" -> 30 minutes)
const match = duration.match(/PT(?:(\d+)H)?(?:(\d+)M)?/);
const hours = match?.[1] ? parseInt(match[1], 10) : 0;
const minutes = match?.[2] ? parseInt(match[2], 10) : 0;
return hours * 60 + minutes;
}

View File

@ -2,20 +2,65 @@ import { Handlers, PageProps } from "$fresh/server.ts";
import { IngredientsList } from "@islands/IngredientsList.tsx";
import { MainLayout } from "@components/layouts/main.tsx";
import Counter from "@islands/Counter.tsx";
import { useSignal } from "@preact/signals";
import { Signal, useSignal } from "@preact/signals";
import { getRecipe, Recipe } from "@lib/resource/recipes.ts";
import { RedirectSearchHandler } from "@islands/Search.tsx";
import { KMenu } from "@islands/KMenu.tsx";
import PageHero from "@components/PageHero.tsx";
import { Star } from "@components/Stars.tsx";
import { renderMarkdown } from "@lib/documents.ts";
export const handler: Handlers<{ recipe: Recipe; session: unknown } | null> = {
async GET(_, ctx) {
const recipe = await getRecipe(ctx.params.name);
return ctx.render({ recipe, session: ctx.state.session });
try {
const recipe = await getRecipe(ctx.params.name);
if (!recipe) {
return ctx.renderNotFound();
}
return ctx.render({ recipe, session: ctx.state.session });
} catch (_e) {
return ctx.renderNotFound();
}
},
};
function isValidRecipe(recipe: Recipe | null) {
return recipe?.ingredients?.length && recipe?.instructions?.length &&
recipe.name?.length;
}
function ValidRecipe({
recipe,
amount,
portion,
}: { recipe: Recipe; amount: Signal<number>; portion: number }) {
return (
<>
<div class="flex items-center gap-8">
<h3 class="text-3xl my-5">Ingredients</h3>
{portion && <Counter count={amount} />}
</div>
<IngredientsList
ingredients={recipe.ingredients}
amount={amount}
portion={portion}
/>
<h3 class="text-3xl my-5">Preparation</h3>
<ol class="list-decimal grid gap-4">
{recipe.instructions && (recipe.instructions.map((instruction) => {
return (
<li
dangerouslySetInnerHTML={{
__html: renderMarkdown(instruction),
}}
/>
);
}))}
</ol>
</>
);
}
export default function Greet(
props: PageProps<{ recipe: Recipe; session: Record<string, string> }>,
) {
@ -47,7 +92,9 @@ export default function Greet(
)}
</PageHero.Header>
<PageHero.Footer>
<PageHero.Title>{recipe.name}</PageHero.Title>
<PageHero.Title link={recipe.meta?.link}>
{recipe.name}
</PageHero.Title>
<PageHero.Subline
entries={subline}
>
@ -55,23 +102,23 @@ export default function Greet(
</PageHero.Subline>
</PageHero.Footer>
</PageHero>
<div class="px-8 text-white mt-10">
<div class="flex items-center gap-8">
<h3 class="text-3xl my-5">Ingredients</h3>
{portion && <Counter count={amount} />}
</div>
<IngredientsList
ingredients={recipe.ingredients}
amount={amount}
portion={portion}
/>
<h3 class="text-3xl my-5">Preparation</h3>
<pre
class="whitespace-break-spaces"
dangerouslySetInnerHTML={{ __html: recipe.preparation || "" }}
>
{recipe.preparation}
</pre>
{isValidRecipe(recipe)
? (
<ValidRecipe
recipe={recipe}
amount={amount}
portion={portion || 1}
/>
)
: (
<div
dangerouslySetInnerHTML={{
__html: renderMarkdown(recipe?.markdown || ""),
}}
/>
)}
</div>
</MainLayout>
);