feat: url scraper to recipe

This commit is contained in:
2025-01-18 00:46:05 +01:00
parent 6112d007c2
commit d4cccacc28
24 changed files with 1349 additions and 137 deletions

View File

@ -4,31 +4,22 @@ import {
getTextOfRange,
parseDocument,
} from "@lib/documents.ts";
import { parse } from "yaml";
import { parseIngredient } from "https://esm.sh/parse-ingredient@1.0.1";
import { parse, stringify } from "yaml";
import { createCrud } from "@lib/crud.ts";
import { extractHashTags } from "@lib/string.ts";
export type IngredientGroup = {
name: string;
ingredients: Ingredient[];
};
export type Ingredient = {
type: string;
unit?: string;
amount?: string;
};
export type Ingredients = (Ingredient | IngredientGroup)[];
import { Ingredient, IngredientGroup } from "@lib/recipeSchema.ts";
import { fixRenderedMarkdown } from "@lib/helpers.ts";
import { parseIngredient } from "@lib/parseIngredient.ts";
export type Recipe = {
type: "recipe";
id: string;
name: string;
description?: string;
ingredients: Ingredients;
preparation?: string;
markdown?: string;
ingredients: (Ingredient | IngredientGroup)[];
instructions?: string[];
notes?: string[];
tags: string[];
meta?: {
time?: string;
@ -49,38 +40,8 @@ function parseIngredientItem(listItem: DocumentChild): Ingredient | undefined {
const text = children.map((c) => getTextOfChild(c)).join(" ").trim();
const ing = parseIngredient(text, {
additionalUOMs: {
tableSpoon: {
short: "EL",
plural: "Table Spoons",
alternates: ["el", "EL", "Tbsp", "tbsp"],
},
teaSpoon: {
short: "TL",
plural: "Tea Spoon",
alternates: ["tl", "TL", "Tsp", "tsp", "teaspoon"],
},
litre: {
short: "L",
plural: "liters",
alternates: ["L", "l"],
},
paket: {
short: "Paket",
plural: "Pakets",
alternates: ["Paket", "paket"],
},
},
});
return {
type: ing[0].description,
unit: ing[0].unitOfMeasure,
amount: ing[0].quantity,
};
return parseIngredient(text);
}
return;
}
const isIngredient = (item: Ingredient | undefined): item is Ingredient => {
@ -112,9 +73,10 @@ function parseIngredients(children: DocumentChild[]): Recipe["ingredients"] {
if (!nextChild || nextChild.type !== "list") continue;
const name = getTextOfChild(child);
ingredients.push({
name: getTextOfChild(child) || "",
ingredients: parseIngredientsList(nextChild),
name: name || "",
items: parseIngredientsList(nextChild),
});
skip = true;
continue;
@ -128,6 +90,19 @@ function parseIngredients(children: DocumentChild[]): Recipe["ingredients"] {
return ingredients;
}
function extractSteps(
content: string,
seperator: RegExp = /\n(?=\d+\.)/g,
): string[] {
const steps = content.split(seperator).map((step) => {
const match = step.match(/^(\d+)\.\s*(.*)/);
if (!match) return;
const [, , text] = match;
return text;
}).filter((step) => !!step);
return steps as string[];
}
export function parseRecipe(original: string, id: string): Recipe {
const doc = parseDocument(original);
@ -140,8 +115,8 @@ export function parseRecipe(original: string, id: string): Recipe {
if (child.type === "yaml") {
try {
meta = parse(child.value) as Recipe["meta"];
} catch (_) {
// console.log("Error parsing YAML", err);
} catch (err) {
console.log("Error parsing YAML", err);
}
continue;
}
@ -168,7 +143,14 @@ export function parseRecipe(original: string, id: string): Recipe {
const ingredients = parseIngredients(groups[1]);
const preparation = getTextOfRange(groups[2], original);
const instructionText = getTextOfRange(groups[2], original);
let instructions = extractSteps(instructionText || "");
if (instructions.length <= 1) {
const d = extractSteps(instructionText || "", /\n/g);
if (d.length > instructions.length) {
instructions = d;
}
}
const tags = extractHashTags(description || "");
if (description) {
@ -183,15 +165,88 @@ export function parseRecipe(original: string, id: string): Recipe {
meta,
name,
tags,
markdown: original,
notes: getTextOfRange(groups[3], original)?.split("\n"),
description,
ingredients,
preparation,
instructions,
};
}
function filterUndefinedFromObject<T extends { [key: string]: unknown }>(
obj: T,
) {
return Object.fromEntries(
Object.entries(obj).filter(([_, v]) => v !== undefined),
);
}
export function renderRecipe(recipe: Recipe) {
const meta = filterUndefinedFromObject(recipe.meta || {});
// Clean up meta properties
delete meta.thumbnail;
delete meta.average;
const recipeImage = meta.image ? `![](${meta.image})` : "";
// Format ingredient groups and standalone ingredients
const ingredients = recipe.ingredients
.map((item) => {
if ("items" in item) {
return `\n*${item.name}*\n${
item.items
.map((ing) => {
if (ing.quantity && ing.unit) {
return `- **${ing.quantity.trim() || ""}${
ing.unit.trim() || ""
}** ${ing.name}`;
}
return `- ${ing.name}`;
})
.join("\n")
}`;
}
if (item.quantity && item.unit) {
return `- **${item.quantity?.trim() || ""}${
item.unit?.trim() || ""
}** ${item.name}`;
}
return `- ${item.name}`;
})
.join("\n");
// Format instructions as a numbered list
const instructions = recipe.instructions
? recipe.instructions.map((step, i) => `${i + 1}. ${step}`).join("\n")
: "";
// Render the final markdown
return fixRenderedMarkdown(`${
Object.keys(meta).length
? `---
${stringify(meta)}
---`
: `---
---`
}
# ${recipe.name}
${recipe.meta?.image ? recipeImage : ""}
${recipe.tags.map((t) => `#${t.replaceAll(" ", "-")}`).join(" ")}
${recipe.description || ""}
---
${ingredients ? `## Ingredients\n\n${ingredients}\n\n---\n` : ""}
${instructions ? `${instructions}\n\n---` : ""}
${recipe.notes?.length ? `\n${recipe.notes.join("\n")}` : ""}
`);
}
const crud = createCrud<Recipe>({
prefix: `Recipes/`,
parse: parseRecipe,
render: renderRecipe,
hasThumbnails: true,
});