fix: make recipe crawling work

This commit is contained in:
Max Richter
2025-11-12 15:41:30 +01:00
parent 92126882b6
commit 7ad08daf80
19 changed files with 44 additions and 55 deletions

View File

@@ -10,5 +10,5 @@ export const Emoji = (props: { class?: string; name: string }) => {
/> />
) )
: <span>{props.name}</span> : <span>{props.name}</span>
: <></>; : null;
}; };

View File

@@ -18,8 +18,9 @@ export const IngredientGroupSchema = z.object({
export type IngredientGroup = z.infer<typeof IngredientGroupSchema>; export type IngredientGroup = z.infer<typeof IngredientGroupSchema>;
const recipeSchema = z.object({ const recipeSchema = z.object({
_type: z.literal("Recipe"),
name: z.string().describe( name: z.string().describe(
"Title of the Recipe, without the name of the website or author", "Name of the Recipe, without the name of the website or author",
), ),
description: z.string().describe( description: z.string().describe(
"Optional, short description of the recipe", "Optional, short description of the recipe",
@@ -41,11 +42,7 @@ const recipeSchema = z.object({
export type Recipe = z.infer<typeof recipeSchema>; export type Recipe = z.infer<typeof recipeSchema>;
const noRecipeSchema = z.object({ const noRecipeSchema = z.literal("none").describe("No Recipe found");
errorMessages: z.array(z.string()).describe(
"List of error messages, if no recipe was found",
),
});
export const recipeResponseSchema = z.union([recipeSchema, noRecipeSchema]); export const recipeResponseSchema = z.union([recipeSchema, noRecipeSchema]);

View File

@@ -1,26 +1,26 @@
export const resources = { export const resources = {
"home": { "home": {
emoji: "House with Garden.png", emoji: "home_icon.png",
name: "Home", name: "Home",
link: "/", link: "/",
}, },
"recipe": { "recipe": {
emoji: "Fork and Knife with Plate.png", emoji: "recipes_icon.png",
name: "Recipes", name: "Recipes",
link: "/recipes", link: "/recipes",
}, },
"movie": { "movie": {
emoji: "Popcorn.png", emoji: "movies_icon.png",
name: "Movies", name: "Movies",
link: "/movies", link: "/movies",
}, },
"article": { "article": {
emoji: "Writing Hand Medium-Light Skin Tone.png", emoji: "articles_icon.png",
name: "Articles", name: "Articles",
link: "/articles", link: "/articles",
}, },
"series": { "series": {
emoji: "Television.png", emoji: "tv_series_icon.png",
name: "Series", name: "Series",
link: "/series", link: "/series",
}, },

View File

@@ -17,6 +17,7 @@ export function safeFileName(input: string): string {
.normalize("NFKD") .normalize("NFKD")
.replace(/[\u0300-\u036f]/g, "") .replace(/[\u0300-\u036f]/g, "")
.replace(/[\s-]+/g, "_") .replace(/[\s-]+/g, "_")
.replace(/-+/g, "-")
.replace(/[^A-Za-z0-9_]+/g, "") .replace(/[^A-Za-z0-9_]+/g, "")
.replace(/_+/g, "_") .replace(/_+/g, "_")
// Trim underscores/dots from ends and prevent leading dots // Trim underscores/dots from ends and prevent leading dots

View File

@@ -181,7 +181,7 @@ export async function webScrape(
return { return {
...result, ...result,
dom, dom: dom.window.document,
markdown: turndownService.turndown(result.content), markdown: turndownService.turndown(result.content),
}; };
} }

View File

@@ -37,17 +37,17 @@ async function processCreateRecipeFromUrl(
let recipe: z.infer<typeof recipeSchema> | undefined = undefined; let recipe: z.infer<typeof recipeSchema> | undefined = undefined;
if (jsonLds.length > 0) { if (jsonLds.length > 0) {
for (const jsonLd of jsonLds) { for (const jsonLd of jsonLds) {
recipe = parseJsonLdToRecipeSchema(jsonLd.textContent || ""); if (jsonLd.textContent) {
if (recipe) break; recipe = parseJsonLdToRecipeSchema(jsonLd.textContent);
if (recipe) break;
}
} }
} }
if (!recipe) { if (!recipe) {
const res = await openai.extractRecipe(result.markdown); const res = await openai.extractRecipe(result.markdown);
if (!res || "errorMessages" in res) { if (!res || res === "none") {
const errorMessage = res?.errorMessages?.[0] || streamResponse.error(`failed to extract recipe: ${res}`);
"could not extract recipe";
streamResponse.error(`failed to extract recipe: ${errorMessage}`);
return; return;
} }
recipe = res; recipe = res;
@@ -72,9 +72,7 @@ async function processCreateRecipeFromUrl(
if (newRecipe?.image && newRecipe.image.length > 5) { if (newRecipe?.image && newRecipe.image.length > 5) {
const extension = fileExtension(newRecipe.image); const extension = fileExtension(newRecipe.image);
const finalPath = `resources/recipes/images/${ const finalPath = `recipes/images/${safeFileName(id)}_cover.${extension}`;
safeFileName(id)
}_cover.${extension}`;
streamResponse.info("downloading image"); streamResponse.info("downloading image");
try { try {
streamResponse.info("downloading image"); streamResponse.info("downloading image");
@@ -82,7 +80,7 @@ async function processCreateRecipeFromUrl(
streamResponse.info("saving image"); streamResponse.info("saving image");
const buffer = await res.arrayBuffer(); const buffer = await res.arrayBuffer();
await createResource(finalPath, buffer); await createResource(finalPath, buffer);
newRecipe.image = finalPath; newRecipe.image = `resources/${finalPath}`;
} catch (err) { } catch (err) {
console.log("Failed to save image", err); console.log("Failed to save image", err);
} }

View File

@@ -1,5 +1,4 @@
import recipeSchema from "@lib/recipeSchema.ts"; import recipeSchema, { Recipe } from "@lib/recipeSchema.ts";
import { parseIngredients } from "@lib/parseIngredient.ts";
export function parseJsonLdToRecipeSchema(jsonLdContent: string) { export function parseJsonLdToRecipeSchema(jsonLdContent: string) {
try { try {
@@ -19,12 +18,7 @@ export function parseJsonLdToRecipeSchema(jsonLdContent: string) {
return; return;
} }
// Map and parse ingredients into the new schema const recipeInstructions = Array.isArray(data.recipeInstructions)
const ingredients = parseIngredients(
data?.recipeIngredient?.join("\n") || "",
);
const instructions = Array.isArray(data.recipeInstructions)
? data.recipeInstructions.map((instr: unknown) => { ? data.recipeInstructions.map((instr: unknown) => {
if (!instr) return ""; if (!instr) return "";
if (typeof instr === "string") return instr; if (typeof instr === "string") return instr;
@@ -36,43 +30,41 @@ export function parseJsonLdToRecipeSchema(jsonLdContent: string) {
: []; : [];
// Parse servings // Parse servings
const servings = parseServings(data.recipeYield); const recipeYield = parseServings(data.recipeYield);
// Parse times // Parse times
const prepTime = parseDuration(data.prepTime);
const cookTime = parseDuration(data.cookTime);
const totalTime = parseDuration(data.totalTime); const totalTime = parseDuration(data.totalTime);
// Extract tags // Extract tags
const tags = data.keywords const keywords = data.keywords
? Array.isArray(data.keywords) ? Array.isArray(data.keywords)
? data.keywords ? data.keywords
: data.keywords.split(",").map((tag: string) => tag.trim()) : data.keywords.split(",").map((tag: string) => tag.trim())
: []; : [];
// Build the recipe object // Build the recipe object
const recipe = { const recipe: Recipe = {
_type: "Recipe", _type: "Recipe",
title: data.name || "Unnamed Recipe", name: data.name || "Unnamed Recipe",
image: pickImage(image || data.image || ""), image: pickImage(image || data.image || ""),
author: Array.isArray(data.author) author: {
? data.author.map((a: { name: string }) => a.name).join(", ") "_type": "Person",
: data.author?.name || "", name: Array.isArray(data.author)
? data.author.map((a: { name: string }) => a.name).join(", ")
: data.author?.name || "",
},
description: data.description || "", description: data.description || "",
ingredients, recipeIngredient: data.recipeIngredient,
instructions, recipeInstructions,
servings, recipeYield,
prepTime,
cookTime,
totalTime, totalTime,
tags, keywords,
notes: data.notes || [],
}; };
// Validate against the schema // Validate against the schema
return recipeSchema.parse(recipe); return recipeSchema.parse(recipe);
} catch (error) { } catch (error) {
console.error("Invalid JSON-LD content or parsing error:", error); console.log("Invalid JSON-LD content or parsing error:", error);
return undefined; return undefined;
} }
} }

View File

@@ -49,8 +49,9 @@ export default function Greet(
<RedirectSearchHandler /> <RedirectSearchHandler />
<KMenu type="main" context={{ type: "articles" }} /> <KMenu type="main" context={{ type: "articles" }} />
<Grid> <Grid>
{articles?.map((doc) => ( {articles?.map((doc, i) => (
<ResourceCard <ResourceCard
key={doc.name || i}
sublink="articles" sublink="articles"
res={doc} res={doc}
/> />

View File

@@ -12,14 +12,12 @@ export default function Home(props: PageProps) {
<RedirectSearchHandler /> <RedirectSearchHandler />
<KMenu type="main" context={false} /> <KMenu type="main" context={false} />
<MainLayout url={props.url}> <MainLayout url={props.url}>
<h1 class="text-4xl mb-4 mt-3 text-white flex gap-2">
<img src="/favicon.png" class="w-8 h-8 inline" />
Resources
</h1>
<div class="flex flex-wrap items-center gap-4"> <div class="flex flex-wrap items-center gap-4">
{Object.values(resources).filter((v) => v.link !== "/").map((m) => { {Object.values(resources).filter((v) => v.link !== "/").map((m) => {
return ( return (
<Card <Card
splotch
key={m.link}
title={`${m.name}`} title={`${m.name}`}
backgroundSize={80} backgroundSize={80}
image={`${ image={`${

View File

@@ -52,7 +52,7 @@ export default async function MovieIndex(
</header> </header>
<Grid> <Grid>
{movies?.map((doc, i) => { {movies?.map((doc, i) => {
return <ResourceCard key={i} res={doc} />; return <ResourceCard key={doc.name || i} res={doc} />;
})} })}
</Grid> </Grid>
</MainLayout> </MainLayout>

View File

@@ -50,7 +50,9 @@ export default function Greet(
</header> </header>
<Grid> <Grid>
{series?.map((doc, i) => { {series?.map((doc, i) => {
return <ResourceCard key={i} sublink="series" res={doc} />; return (
<ResourceCard key={doc.name || i} sublink="series" res={doc} />
);
})} })}
</Grid> </Grid>
</MainLayout> </MainLayout>

Binary file not shown.

After

Width:  |  Height:  |  Size: 162 KiB

BIN
static/emojis/home_icon.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 190 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 174 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 144 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 185 KiB

BIN
static/splotch_1.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 394 KiB

BIN
static/splotch_2.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 407 KiB

BIN
static/splotch_3.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 390 KiB