diff --git a/Dockerfile b/Dockerfile index 404a01b..c8e965e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,21 +1,25 @@ FROM denoland/deno:2.1.4 AS build +RUN apt-get update && apt-get install -y --no-install-recommends \ + curl && \ + deno run -A npm:playwright install --with-deps firefox &&\ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + WORKDIR /app +COPY deno.json /app/ + COPY . . ENV DATA_DIR=/app/data -RUN apt-get update && apt install -y curl && \ - deno run -A npm:playwright install --with-deps firefox && \ - deno install --allow-import --allow-ffi --allow-scripts=npm:sharp@0.33.5-rc.1 -e main.ts && \ +RUN mkdir -p $DATA_DIR && \ + deno install --allow-import --allow-ffi --allow-scripts=npm:sharp@0.33.5-rc.1 -e main.ts &&\ sed -i -e 's/"deno"/"no-deno"/' node_modules/@libsql/client/package.json &&\ - mkdir -p $DATA_DIR &&\ deno task build - EXPOSE 8000 -# Start the application CMD ["run", "-A", "main.ts"] diff --git a/islands/IngredientsList.tsx b/islands/IngredientsList.tsx index d889232..846313a 100644 --- a/islands/IngredientsList.tsx +++ b/islands/IngredientsList.tsx @@ -1,13 +1,23 @@ import { Signal } from "@preact/signals"; import type { Ingredient, IngredientGroup } from "@lib/recipeSchema.ts"; import { FunctionalComponent } from "preact"; +import { unitsOfMeasure } from "@lib/parseIngredient.ts"; -function numberToString(num: number) { +function formatAmount(num: number) { + if (num === 0) return ""; return (Math.floor(num * 4) / 4).toString(); } -function stringToNumber(str: string) { - return parseFloat(str); +function formatUnit(unit: string, amount: number) { + const unitKey = unit.toLowerCase() as keyof typeof unitsOfMeasure; + if (unitKey in unitsOfMeasure) { + if (amount > 1 && unitsOfMeasure[unitKey].plural !== undefined) { + return unitsOfMeasure[unitKey].plural; + } + return unitsOfMeasure[unitKey].short; + } else { + return unit; + } } const Ingredient = ( @@ -20,7 +30,7 @@ const Ingredient = ( ) => { const { name, quantity, unit } = ingredient; - const parsedQuantity = stringToNumber(quantity); + const parsedQuantity = parseFloat(quantity); const finalAmount = (typeof parsedQuantity === "number" && amount) ? (parsedQuantity / portion) * (amount?.value || 1) @@ -29,8 +39,10 @@ const Ingredient = ( return ( - {numberToString(finalAmount || 0) + - (typeof unit === "string" ? unit : "")} + {formatAmount(finalAmount || 0)} + + {formatUnit(unit, finalAmount || 0)} + {name} diff --git a/lib/documents.ts b/lib/documents.ts index d665bf8..8a3fab1 100644 --- a/lib/documents.ts +++ b/lib/documents.ts @@ -109,7 +109,7 @@ export async function getDocument(name: string): Promise { export function updateDocument(name: string, content: string) { return db.update(documentTable).set({ content, - }).where(eq(documentTable.name, name)); + }).where(eq(documentTable.name, name)).run(); } export function transformDocument(input: string, cb: (r: Root) => Root) { diff --git a/lib/openai.ts b/lib/openai.ts index 2a8a9e6..b845ec6 100644 --- a/lib/openai.ts +++ b/lib/openai.ts @@ -3,7 +3,7 @@ import { zodResponseFormat } from "https://deno.land/x/openai@v4.69.0/helpers/zo import { OPENAI_API_KEY } from "@lib/env.ts"; import { hashString } from "@lib/helpers.ts"; import { createCache } from "@lib/cache.ts"; -import recipeSchema from "@lib/recipeSchema.ts"; +import recipeSchema, { recipeResponseSchema } from "@lib/recipeSchema.ts"; const openAI = OPENAI_API_KEY && new OpenAI({ apiKey: OPENAI_API_KEY }); @@ -223,7 +223,7 @@ export async function extractRecipe(content: string) { }, { role: "user", content }, ], - response_format: zodResponseFormat(recipeSchema, "recipe-v2"), + response_format: zodResponseFormat(recipeResponseSchema, "recipe-v2"), }); return recipeSchema.parse(completion.choices[0].message.parsed); diff --git a/lib/parseIngredient.ts b/lib/parseIngredient.ts index dc7c188..83d32a0 100644 --- a/lib/parseIngredient.ts +++ b/lib/parseIngredient.ts @@ -1,35 +1,94 @@ -import { parseIngredient as _parseIngredient } from "https://esm.sh/parse-ingredient@1.0.1"; +import { + parseIngredient, + unitsOfMeasure as _unitsOfMeasure, +} from "https://esm.sh/parse-ingredient@1.2.1"; +import { Ingredient, IngredientGroup } from "@lib/recipeSchema.ts"; +import { removeMarkdownFormatting } from "@lib/string.ts"; -export function parseIngredient(text: string) { - const ing = _parseIngredient(text, { - additionalUOMs: { - tableSpoon: { - short: "EL", - plural: "Table Spoons", - alternates: ["el", "EL", "Tbsp", "tbsp"], - }, - teaSpoon: { - short: "TL", - plural: "Tea Spoon", - alternates: ["tl", "TL", "Tsp", "tsp", "teaspoon"], - }, - litre: { - short: "L", - plural: "liters", - alternates: ["L", "l"], - }, - paket: { - short: "Paket", - plural: "Pakets", - alternates: ["Paket", "paket"], - }, - }, +const customUnits = { + tableSpoon: { + short: "EL", + plural: "Table Spoons", + alternates: ["el", "EL", "Tbsp", "tbsp"], + }, + dose: { + short: "Dose", + plural: "Dosen", + alternates: ["Dose", "dose", "Dose(n)"], + }, + pound: { + short: "lb", + plural: "pounds", + alternates: ["lb", "lbs", "pound", "pounds"], + }, + teaSpoon: { + short: "TL", + plural: "Tea Spoon", + alternates: ["tl", "TL", "Tsp", "tsp", "teaspoon"], + }, + litre: { + short: "L", + plural: "liters", + alternates: ["L", "l"], + }, + paket: { + short: "Paket", + plural: "Pakets", + alternates: ["Paket", "paket"], + }, +}; + +export const unitsOfMeasure = { + ..._unitsOfMeasure, + ...customUnits, +} as const; + +export function parseIngredients( + text: string, +): (Ingredient | IngredientGroup)[] { + const cleanText = removeMarkdownFormatting(text); + + const ingredients = parseIngredient(cleanText, { + normalizeUOM: true, + additionalUOMs: customUnits, }); - return { - name: ing[0].description, - unit: ing[0].unitOfMeasure || "", - quantity: ing[0].quantity?.toString() || "", - note: "", - }; + const results: (Ingredient | IngredientGroup)[] = []; + let currentGroup: IngredientGroup | undefined; + + for (const ing of ingredients) { + if (ing.isGroupHeader) { + if (currentGroup) { + results.push(currentGroup); + } + currentGroup = { + name: ing.description.replace(/:$/, ""), + items: [], + }; + } else { + const ingredient = { + name: ing.description.replace(/^\s?-/, "").trim(), + unit: ing.unitOfMeasure || "", + quantity: ing.quantity?.toString() || ing.quantity2?.toString() || "", + note: "", + }; + + const unit = ingredient.unit.toLowerCase() as keyof typeof unitsOfMeasure; + if (unit in unitsOfMeasure && unit !== "cup") { + ingredient.unit = unitsOfMeasure[unit].short; + } + + if (!currentGroup) { + results.push(ingredient); + } else { + currentGroup.items.push(ingredient); + } + } + } + + if (currentGroup) { + results.push(currentGroup); + } + + return results; } diff --git a/lib/playwright.ts b/lib/playwright.ts index 4df57ff..e39651a 100644 --- a/lib/playwright.ts +++ b/lib/playwright.ts @@ -16,6 +16,7 @@ export async function fetchHtmlWithPlaywright( streamResponse: ReturnType, ): Promise { streamResponse.enqueue("booting up playwright"); + // Launch the Playwright browser const browser = await firefox.launch(); diff --git a/lib/recipeSchema.ts b/lib/recipeSchema.ts index cc768fa..6f195c7 100644 --- a/lib/recipeSchema.ts +++ b/lib/recipeSchema.ts @@ -36,4 +36,12 @@ const recipeSchema = z.object({ notes: z.array(z.string()).describe("Optional notes about the recipe"), }); +const noRecipeSchema = z.object({ + errorMessages: z.array(z.string()).describe( + "List of error messages, if no recipe was found", + ), +}); + +export const recipeResponseSchema = z.union([recipeSchema, noRecipeSchema]); + export default recipeSchema; diff --git a/lib/resource/recipes.ts b/lib/resource/recipes.ts index bf60cfd..b4b9724 100644 --- a/lib/resource/recipes.ts +++ b/lib/resource/recipes.ts @@ -1,6 +1,5 @@ import { type DocumentChild, - getTextOfChild, getTextOfRange, parseDocument, } from "@lib/documents.ts"; @@ -9,7 +8,7 @@ import { createCrud } from "@lib/crud.ts"; import { extractHashTags } from "@lib/string.ts"; import { Ingredient, IngredientGroup } from "@lib/recipeSchema.ts"; import { fixRenderedMarkdown } from "@lib/helpers.ts"; -import { parseIngredient } from "@lib/parseIngredient.ts"; +import { parseIngredients } from "@lib/parseIngredient.ts"; export type Recipe = { type: "recipe"; @@ -33,72 +32,14 @@ export type Recipe = { }; }; -function parseIngredientItem(listItem: DocumentChild): Ingredient | undefined { - if (listItem.type === "listItem") { - const children: DocumentChild[] = listItem.children[0]?.children || - listItem.children; - - const text = children.map((c) => getTextOfChild(c)).join(" ").trim(); - - return parseIngredient(text); - } -} - -const isIngredient = (item: Ingredient | undefined): item is Ingredient => { - return !!item; -}; - -function parseIngredientsList(list: DocumentChild): Ingredient[] { - if (list.type === "list" && "children" in list) { - return list.children.map((listItem) => { - return parseIngredientItem(listItem); - }).filter(isIngredient); - } - return []; -} - -function parseIngredients(children: DocumentChild[]): Recipe["ingredients"] { - const ingredients: (Ingredient | IngredientGroup)[] = []; - if (!children) return []; - let skip = false; - for (let i = 0; i < children.length; i++) { - if (skip) { - skip = false; - continue; - } - const child = children[i]; - - if (child.type === "paragraph") { - const nextChild = children[i + 1]; - - if (!nextChild || nextChild.type !== "list") continue; - - const name = getTextOfChild(child); - ingredients.push({ - name: name || "", - items: parseIngredientsList(nextChild), - }); - skip = true; - continue; - } - - if (child.type === "list") { - ingredients.push(...parseIngredientsList(child)); - } - } - - return ingredients; -} - function extractSteps( content: string, seperator: RegExp = /\n(?=\d+\.)/g, ): string[] { const steps = content.split(seperator).map((step) => { const match = step.match(/^(\d+)\.\s*(.*)/); - if (!match) return; - const [, , text] = match; - return text; + if (match) return match[2]; + return step; }).filter((step) => !!step); return steps as string[]; } @@ -141,7 +82,14 @@ export function parseRecipe(original: string, id: string): Recipe { let description = getTextOfRange(groups[0], original); - const ingredients = parseIngredients(groups[1]); + let ingredientsText = getTextOfRange(groups[1], original); + if (ingredientsText) { + ingredientsText = ingredientsText.replace(/#+\s?Ingredients?/, ""); + } else { + ingredientsText = ""; + } + + const ingredients = parseIngredients(ingredientsText); const instructionText = getTextOfRange(groups[2], original); let instructions = extractSteps(instructionText || ""); diff --git a/lib/search.ts b/lib/search.ts index c2d877e..b00b95d 100644 --- a/lib/search.ts +++ b/lib/search.ts @@ -55,8 +55,6 @@ const isResource = ( export async function searchResource( { q, tags = [], types, authors, rating }: SearchParams, ): Promise { - console.log("searchResource", { q, tags, types, authors, rating }); - let resources = (await Promise.all([ (!types || types.includes("movie")) && getAllMovies(), (!types || types.includes("series")) && getAllSeries(), diff --git a/lib/string.ts b/lib/string.ts index 8111ced..e562a47 100644 --- a/lib/string.ts +++ b/lib/string.ts @@ -133,3 +133,41 @@ export function parseTimeCacheKey(key: string) { export function rgbToHex(r: number, g: number, b: number) { return "#" + componentToHex(r) + componentToHex(g) + componentToHex(b); } + +export function removeMarkdownFormatting(text: string): string { + // Remove code blocks + text = text.replace(/```[\s\S]*?```/g, ""); + + // Remove inline code + text = text.replace(/`([^`]+)`/g, "$1"); + + // Remove images + text = text.replace(/!\[.*?\]\(.*?\)/g, ""); + + // Remove links + text = text.replace(/\[([^\]]+)\]\([^\)]+\)/g, "$1"); + + // Remove bold and italic formatting + text = text.replace(/(\*\*|__)(.*?)\1/g, "$2"); // Bold + text = text.replace(/(\*|_)(.*?)\1/g, "$2"); // Italic + + // Remove strikethrough + text = text.replace(/~~(.*?)~~/g, "$1"); + + // Remove headings + text = text.replace(/^#{1,6}\s*(.+)$/gm, "$1"); + + // Remove blockquotes + text = text.replace(/^>\s*/gm, ""); + + // Remove unordered list markers + text = text.replace(/^[-*+]\s+/gm, "-"); + + // Remove ordered list markers + text = text.replace(/^\d+\.\s+/gm, ""); + + // Remove horizontal rules + text = text.replace(/^---+$/gm, ""); + + return text; +} diff --git a/routes/api/recipes/create/index.ts b/routes/api/recipes/create/index.ts index e42862e..4a3e646 100644 --- a/routes/api/recipes/create/index.ts +++ b/routes/api/recipes/create/index.ts @@ -90,14 +90,8 @@ async function extractUsingAI( const markdown = service.turndown(cleanDocument); streamResponse.enqueue("extracting recipe with openai"); - console.log("------- MARKDOWN ------"); - console.log(markdown); - console.log("-----------------------"); const recipe = await openai.extractRecipe(markdown); - console.log("------- EXTRACTED ------"); - console.log(JSON.stringify(recipe, null, 2)); - console.log("-----------------------"); return recipe; } @@ -142,7 +136,6 @@ async function processCreateRecipeFromUrl( let recipe: z.infer | undefined = undefined; if (jsonLds.length > 0) { for (const jsonLd of jsonLds) { - console.log({ content: jsonLd.textContent }); recipe = parseJsonLdToRecipeSchema(jsonLd.textContent || ""); if (recipe) break; } @@ -152,7 +145,7 @@ async function processCreateRecipeFromUrl( recipe = await extractUsingAI(url, document, streamResponse); } - const id = (recipe?.title || title || "").replaceAll(" ", "-"); + const id = (recipe?.title || title || "").replace(/--+/, "-"); if (!recipe) { streamResponse.enqueue("failed to parse recipe"); @@ -226,10 +219,6 @@ async function processCreateRecipeFromUrl( streamResponse.enqueue("finished processing, creating file"); - console.log("------- CREATING ------"); - console.log(JSON.stringify(recipe, null, 2)); - console.log("-----------------------"); - await createRecipe(newRecipe.id, newRecipe); streamResponse.enqueue("id: " + newRecipe.id); @@ -254,6 +243,7 @@ export const handler: Handlers = { processCreateRecipeFromUrl({ fetchUrl, streamResponse }).then((article) => { log.debug("created article from link", { article }); }).catch((err) => { + streamResponse.enqueue(`error creating article: ${err}`); log.error(err); }).finally(() => { streamResponse.cancel(); diff --git a/routes/api/recipes/create/parseJsonLd.ts b/routes/api/recipes/create/parseJsonLd.ts index 91cd6e1..dba2e71 100644 --- a/routes/api/recipes/create/parseJsonLd.ts +++ b/routes/api/recipes/create/parseJsonLd.ts @@ -1,5 +1,5 @@ import recipeSchema from "@lib/recipeSchema.ts"; -import { parseIngredient } from "@lib/parseIngredient.ts"; +import { parseIngredients } from "@lib/parseIngredient.ts"; export function parseJsonLdToRecipeSchema(jsonLdContent: string) { try { @@ -20,8 +20,8 @@ export function parseJsonLdToRecipeSchema(jsonLdContent: string) { } // Map and parse ingredients into the new schema - const ingredients = (data.recipeIngredient || []).map( - parseIngredient, + const ingredients = parseIngredients( + data?.recipeIngredient?.join("\n") || "", ); const instructions = Array.isArray(data.recipeInstructions) diff --git a/routes/recipes/[name].tsx b/routes/recipes/[name].tsx index 85562a6..5ecfad3 100644 --- a/routes/recipes/[name].tsx +++ b/routes/recipes/[name].tsx @@ -46,17 +46,19 @@ function ValidRecipe({ portion={portion} />

Preparation

-
    - {recipe.instructions && (recipe.instructions.map((instruction) => { - return ( -
  1. - ); - }))} -
+
+
    + {recipe.instructions && (recipe.instructions.map((instruction) => { + return ( +
  1. + ); + }))} +
+
); } diff --git a/static/global.css b/static/global.css index 78cc14a..7c34b1c 100644 --- a/static/global.css +++ b/static/global.css @@ -116,3 +116,7 @@ input[type=number] { .highlight>pre { text-wrap: wrap; } + +.list-decimal li::marker { + color: #8a898c; +}