refactor: simplify parse ingredients code
This commit is contained in:
parent
f106460502
commit
78e94ccf82
16
Dockerfile
16
Dockerfile
@ -1,21 +1,25 @@
|
||||
FROM denoland/deno:2.1.4 AS build
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
curl && \
|
||||
deno run -A npm:playwright install --with-deps firefox &&\
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY deno.json /app/
|
||||
|
||||
COPY . .
|
||||
|
||||
ENV DATA_DIR=/app/data
|
||||
|
||||
RUN apt-get update && apt install -y curl && \
|
||||
deno run -A npm:playwright install --with-deps firefox && \
|
||||
deno install --allow-import --allow-ffi --allow-scripts=npm:sharp@0.33.5-rc.1 -e main.ts && \
|
||||
RUN mkdir -p $DATA_DIR && \
|
||||
deno install --allow-import --allow-ffi --allow-scripts=npm:sharp@0.33.5-rc.1 -e main.ts &&\
|
||||
sed -i -e 's/"deno"/"no-deno"/' node_modules/@libsql/client/package.json &&\
|
||||
mkdir -p $DATA_DIR &&\
|
||||
deno task build
|
||||
|
||||
|
||||
EXPOSE 8000
|
||||
|
||||
# Start the application
|
||||
CMD ["run", "-A", "main.ts"]
|
||||
|
||||
|
@ -1,13 +1,23 @@
|
||||
import { Signal } from "@preact/signals";
|
||||
import type { Ingredient, IngredientGroup } from "@lib/recipeSchema.ts";
|
||||
import { FunctionalComponent } from "preact";
|
||||
import { unitsOfMeasure } from "@lib/parseIngredient.ts";
|
||||
|
||||
function numberToString(num: number) {
|
||||
function formatAmount(num: number) {
|
||||
if (num === 0) return "";
|
||||
return (Math.floor(num * 4) / 4).toString();
|
||||
}
|
||||
|
||||
function stringToNumber(str: string) {
|
||||
return parseFloat(str);
|
||||
function formatUnit(unit: string, amount: number) {
|
||||
const unitKey = unit.toLowerCase() as keyof typeof unitsOfMeasure;
|
||||
if (unitKey in unitsOfMeasure) {
|
||||
if (amount > 1 && unitsOfMeasure[unitKey].plural !== undefined) {
|
||||
return unitsOfMeasure[unitKey].plural;
|
||||
}
|
||||
return unitsOfMeasure[unitKey].short;
|
||||
} else {
|
||||
return unit;
|
||||
}
|
||||
}
|
||||
|
||||
const Ingredient = (
|
||||
@ -20,7 +30,7 @@ const Ingredient = (
|
||||
) => {
|
||||
const { name, quantity, unit } = ingredient;
|
||||
|
||||
const parsedQuantity = stringToNumber(quantity);
|
||||
const parsedQuantity = parseFloat(quantity);
|
||||
|
||||
const finalAmount = (typeof parsedQuantity === "number" && amount)
|
||||
? (parsedQuantity / portion) * (amount?.value || 1)
|
||||
@ -29,8 +39,10 @@ const Ingredient = (
|
||||
return (
|
||||
<tr key={key}>
|
||||
<td class="pr-4 py-2">
|
||||
{numberToString(finalAmount || 0) +
|
||||
(typeof unit === "string" ? unit : "")}
|
||||
{formatAmount(finalAmount || 0)}
|
||||
<span class="ml-0.5 opacity-50">
|
||||
{formatUnit(unit, finalAmount || 0)}
|
||||
</span>
|
||||
</td>
|
||||
<td class="px-4 py-2">{name}</td>
|
||||
</tr>
|
||||
|
@ -109,7 +109,7 @@ export async function getDocument(name: string): Promise<string | undefined> {
|
||||
export function updateDocument(name: string, content: string) {
|
||||
return db.update(documentTable).set({
|
||||
content,
|
||||
}).where(eq(documentTable.name, name));
|
||||
}).where(eq(documentTable.name, name)).run();
|
||||
}
|
||||
|
||||
export function transformDocument(input: string, cb: (r: Root) => Root) {
|
||||
|
@ -3,7 +3,7 @@ import { zodResponseFormat } from "https://deno.land/x/openai@v4.69.0/helpers/zo
|
||||
import { OPENAI_API_KEY } from "@lib/env.ts";
|
||||
import { hashString } from "@lib/helpers.ts";
|
||||
import { createCache } from "@lib/cache.ts";
|
||||
import recipeSchema from "@lib/recipeSchema.ts";
|
||||
import recipeSchema, { recipeResponseSchema } from "@lib/recipeSchema.ts";
|
||||
|
||||
const openAI = OPENAI_API_KEY && new OpenAI({ apiKey: OPENAI_API_KEY });
|
||||
|
||||
@ -223,7 +223,7 @@ export async function extractRecipe(content: string) {
|
||||
},
|
||||
{ role: "user", content },
|
||||
],
|
||||
response_format: zodResponseFormat(recipeSchema, "recipe-v2"),
|
||||
response_format: zodResponseFormat(recipeResponseSchema, "recipe-v2"),
|
||||
});
|
||||
|
||||
return recipeSchema.parse(completion.choices[0].message.parsed);
|
||||
|
@ -1,13 +1,26 @@
|
||||
import { parseIngredient as _parseIngredient } from "https://esm.sh/parse-ingredient@1.0.1";
|
||||
import {
|
||||
parseIngredient,
|
||||
unitsOfMeasure as _unitsOfMeasure,
|
||||
} from "https://esm.sh/parse-ingredient@1.2.1";
|
||||
import { Ingredient, IngredientGroup } from "@lib/recipeSchema.ts";
|
||||
import { removeMarkdownFormatting } from "@lib/string.ts";
|
||||
|
||||
export function parseIngredient(text: string) {
|
||||
const ing = _parseIngredient(text, {
|
||||
additionalUOMs: {
|
||||
const customUnits = {
|
||||
tableSpoon: {
|
||||
short: "EL",
|
||||
plural: "Table Spoons",
|
||||
alternates: ["el", "EL", "Tbsp", "tbsp"],
|
||||
},
|
||||
dose: {
|
||||
short: "Dose",
|
||||
plural: "Dosen",
|
||||
alternates: ["Dose", "dose", "Dose(n)"],
|
||||
},
|
||||
pound: {
|
||||
short: "lb",
|
||||
plural: "pounds",
|
||||
alternates: ["lb", "lbs", "pound", "pounds"],
|
||||
},
|
||||
teaSpoon: {
|
||||
short: "TL",
|
||||
plural: "Tea Spoon",
|
||||
@ -23,13 +36,59 @@ export function parseIngredient(text: string) {
|
||||
plural: "Pakets",
|
||||
alternates: ["Paket", "paket"],
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
export const unitsOfMeasure = {
|
||||
..._unitsOfMeasure,
|
||||
...customUnits,
|
||||
} as const;
|
||||
|
||||
export function parseIngredients(
|
||||
text: string,
|
||||
): (Ingredient | IngredientGroup)[] {
|
||||
const cleanText = removeMarkdownFormatting(text);
|
||||
|
||||
const ingredients = parseIngredient(cleanText, {
|
||||
normalizeUOM: true,
|
||||
additionalUOMs: customUnits,
|
||||
});
|
||||
|
||||
return {
|
||||
name: ing[0].description,
|
||||
unit: ing[0].unitOfMeasure || "",
|
||||
quantity: ing[0].quantity?.toString() || "",
|
||||
const results: (Ingredient | IngredientGroup)[] = [];
|
||||
let currentGroup: IngredientGroup | undefined;
|
||||
|
||||
for (const ing of ingredients) {
|
||||
if (ing.isGroupHeader) {
|
||||
if (currentGroup) {
|
||||
results.push(currentGroup);
|
||||
}
|
||||
currentGroup = {
|
||||
name: ing.description.replace(/:$/, ""),
|
||||
items: [],
|
||||
};
|
||||
} else {
|
||||
const ingredient = {
|
||||
name: ing.description.replace(/^\s?-/, "").trim(),
|
||||
unit: ing.unitOfMeasure || "",
|
||||
quantity: ing.quantity?.toString() || ing.quantity2?.toString() || "",
|
||||
note: "",
|
||||
};
|
||||
|
||||
const unit = ingredient.unit.toLowerCase() as keyof typeof unitsOfMeasure;
|
||||
if (unit in unitsOfMeasure && unit !== "cup") {
|
||||
ingredient.unit = unitsOfMeasure[unit].short;
|
||||
}
|
||||
|
||||
if (!currentGroup) {
|
||||
results.push(ingredient);
|
||||
} else {
|
||||
currentGroup.items.push(ingredient);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (currentGroup) {
|
||||
results.push(currentGroup);
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
@ -16,6 +16,7 @@ export async function fetchHtmlWithPlaywright(
|
||||
streamResponse: ReturnType<typeof createStreamResponse>,
|
||||
): Promise<string> {
|
||||
streamResponse.enqueue("booting up playwright");
|
||||
|
||||
// Launch the Playwright browser
|
||||
const browser = await firefox.launch();
|
||||
|
||||
|
@ -36,4 +36,12 @@ const recipeSchema = z.object({
|
||||
notes: z.array(z.string()).describe("Optional notes about the recipe"),
|
||||
});
|
||||
|
||||
const noRecipeSchema = z.object({
|
||||
errorMessages: z.array(z.string()).describe(
|
||||
"List of error messages, if no recipe was found",
|
||||
),
|
||||
});
|
||||
|
||||
export const recipeResponseSchema = z.union([recipeSchema, noRecipeSchema]);
|
||||
|
||||
export default recipeSchema;
|
||||
|
@ -1,6 +1,5 @@
|
||||
import {
|
||||
type DocumentChild,
|
||||
getTextOfChild,
|
||||
getTextOfRange,
|
||||
parseDocument,
|
||||
} from "@lib/documents.ts";
|
||||
@ -9,7 +8,7 @@ import { createCrud } from "@lib/crud.ts";
|
||||
import { extractHashTags } from "@lib/string.ts";
|
||||
import { Ingredient, IngredientGroup } from "@lib/recipeSchema.ts";
|
||||
import { fixRenderedMarkdown } from "@lib/helpers.ts";
|
||||
import { parseIngredient } from "@lib/parseIngredient.ts";
|
||||
import { parseIngredients } from "@lib/parseIngredient.ts";
|
||||
|
||||
export type Recipe = {
|
||||
type: "recipe";
|
||||
@ -33,72 +32,14 @@ export type Recipe = {
|
||||
};
|
||||
};
|
||||
|
||||
function parseIngredientItem(listItem: DocumentChild): Ingredient | undefined {
|
||||
if (listItem.type === "listItem") {
|
||||
const children: DocumentChild[] = listItem.children[0]?.children ||
|
||||
listItem.children;
|
||||
|
||||
const text = children.map((c) => getTextOfChild(c)).join(" ").trim();
|
||||
|
||||
return parseIngredient(text);
|
||||
}
|
||||
}
|
||||
|
||||
const isIngredient = (item: Ingredient | undefined): item is Ingredient => {
|
||||
return !!item;
|
||||
};
|
||||
|
||||
function parseIngredientsList(list: DocumentChild): Ingredient[] {
|
||||
if (list.type === "list" && "children" in list) {
|
||||
return list.children.map((listItem) => {
|
||||
return parseIngredientItem(listItem);
|
||||
}).filter(isIngredient);
|
||||
}
|
||||
return [];
|
||||
}
|
||||
|
||||
function parseIngredients(children: DocumentChild[]): Recipe["ingredients"] {
|
||||
const ingredients: (Ingredient | IngredientGroup)[] = [];
|
||||
if (!children) return [];
|
||||
let skip = false;
|
||||
for (let i = 0; i < children.length; i++) {
|
||||
if (skip) {
|
||||
skip = false;
|
||||
continue;
|
||||
}
|
||||
const child = children[i];
|
||||
|
||||
if (child.type === "paragraph") {
|
||||
const nextChild = children[i + 1];
|
||||
|
||||
if (!nextChild || nextChild.type !== "list") continue;
|
||||
|
||||
const name = getTextOfChild(child);
|
||||
ingredients.push({
|
||||
name: name || "",
|
||||
items: parseIngredientsList(nextChild),
|
||||
});
|
||||
skip = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (child.type === "list") {
|
||||
ingredients.push(...parseIngredientsList(child));
|
||||
}
|
||||
}
|
||||
|
||||
return ingredients;
|
||||
}
|
||||
|
||||
function extractSteps(
|
||||
content: string,
|
||||
seperator: RegExp = /\n(?=\d+\.)/g,
|
||||
): string[] {
|
||||
const steps = content.split(seperator).map((step) => {
|
||||
const match = step.match(/^(\d+)\.\s*(.*)/);
|
||||
if (!match) return;
|
||||
const [, , text] = match;
|
||||
return text;
|
||||
if (match) return match[2];
|
||||
return step;
|
||||
}).filter((step) => !!step);
|
||||
return steps as string[];
|
||||
}
|
||||
@ -141,7 +82,14 @@ export function parseRecipe(original: string, id: string): Recipe {
|
||||
|
||||
let description = getTextOfRange(groups[0], original);
|
||||
|
||||
const ingredients = parseIngredients(groups[1]);
|
||||
let ingredientsText = getTextOfRange(groups[1], original);
|
||||
if (ingredientsText) {
|
||||
ingredientsText = ingredientsText.replace(/#+\s?Ingredients?/, "");
|
||||
} else {
|
||||
ingredientsText = "";
|
||||
}
|
||||
|
||||
const ingredients = parseIngredients(ingredientsText);
|
||||
|
||||
const instructionText = getTextOfRange(groups[2], original);
|
||||
let instructions = extractSteps(instructionText || "");
|
||||
|
@ -55,8 +55,6 @@ const isResource = (
|
||||
export async function searchResource(
|
||||
{ q, tags = [], types, authors, rating }: SearchParams,
|
||||
): Promise<GenericResource[]> {
|
||||
console.log("searchResource", { q, tags, types, authors, rating });
|
||||
|
||||
let resources = (await Promise.all([
|
||||
(!types || types.includes("movie")) && getAllMovies(),
|
||||
(!types || types.includes("series")) && getAllSeries(),
|
||||
|
@ -133,3 +133,41 @@ export function parseTimeCacheKey(key: string) {
|
||||
export function rgbToHex(r: number, g: number, b: number) {
|
||||
return "#" + componentToHex(r) + componentToHex(g) + componentToHex(b);
|
||||
}
|
||||
|
||||
export function removeMarkdownFormatting(text: string): string {
|
||||
// Remove code blocks
|
||||
text = text.replace(/```[\s\S]*?```/g, "");
|
||||
|
||||
// Remove inline code
|
||||
text = text.replace(/`([^`]+)`/g, "$1");
|
||||
|
||||
// Remove images
|
||||
text = text.replace(/!\[.*?\]\(.*?\)/g, "");
|
||||
|
||||
// Remove links
|
||||
text = text.replace(/\[([^\]]+)\]\([^\)]+\)/g, "$1");
|
||||
|
||||
// Remove bold and italic formatting
|
||||
text = text.replace(/(\*\*|__)(.*?)\1/g, "$2"); // Bold
|
||||
text = text.replace(/(\*|_)(.*?)\1/g, "$2"); // Italic
|
||||
|
||||
// Remove strikethrough
|
||||
text = text.replace(/~~(.*?)~~/g, "$1");
|
||||
|
||||
// Remove headings
|
||||
text = text.replace(/^#{1,6}\s*(.+)$/gm, "$1");
|
||||
|
||||
// Remove blockquotes
|
||||
text = text.replace(/^>\s*/gm, "");
|
||||
|
||||
// Remove unordered list markers
|
||||
text = text.replace(/^[-*+]\s+/gm, "-");
|
||||
|
||||
// Remove ordered list markers
|
||||
text = text.replace(/^\d+\.\s+/gm, "");
|
||||
|
||||
// Remove horizontal rules
|
||||
text = text.replace(/^---+$/gm, "");
|
||||
|
||||
return text;
|
||||
}
|
||||
|
@ -90,14 +90,8 @@ async function extractUsingAI(
|
||||
const markdown = service.turndown(cleanDocument);
|
||||
|
||||
streamResponse.enqueue("extracting recipe with openai");
|
||||
console.log("------- MARKDOWN ------");
|
||||
console.log(markdown);
|
||||
console.log("-----------------------");
|
||||
|
||||
const recipe = await openai.extractRecipe(markdown);
|
||||
console.log("------- EXTRACTED ------");
|
||||
console.log(JSON.stringify(recipe, null, 2));
|
||||
console.log("-----------------------");
|
||||
|
||||
return recipe;
|
||||
}
|
||||
@ -142,7 +136,6 @@ async function processCreateRecipeFromUrl(
|
||||
let recipe: z.infer<typeof recipeSchema> | undefined = undefined;
|
||||
if (jsonLds.length > 0) {
|
||||
for (const jsonLd of jsonLds) {
|
||||
console.log({ content: jsonLd.textContent });
|
||||
recipe = parseJsonLdToRecipeSchema(jsonLd.textContent || "");
|
||||
if (recipe) break;
|
||||
}
|
||||
@ -152,7 +145,7 @@ async function processCreateRecipeFromUrl(
|
||||
recipe = await extractUsingAI(url, document, streamResponse);
|
||||
}
|
||||
|
||||
const id = (recipe?.title || title || "").replaceAll(" ", "-");
|
||||
const id = (recipe?.title || title || "").replace(/--+/, "-");
|
||||
|
||||
if (!recipe) {
|
||||
streamResponse.enqueue("failed to parse recipe");
|
||||
@ -226,10 +219,6 @@ async function processCreateRecipeFromUrl(
|
||||
|
||||
streamResponse.enqueue("finished processing, creating file");
|
||||
|
||||
console.log("------- CREATING ------");
|
||||
console.log(JSON.stringify(recipe, null, 2));
|
||||
console.log("-----------------------");
|
||||
|
||||
await createRecipe(newRecipe.id, newRecipe);
|
||||
|
||||
streamResponse.enqueue("id: " + newRecipe.id);
|
||||
@ -254,6 +243,7 @@ export const handler: Handlers = {
|
||||
processCreateRecipeFromUrl({ fetchUrl, streamResponse }).then((article) => {
|
||||
log.debug("created article from link", { article });
|
||||
}).catch((err) => {
|
||||
streamResponse.enqueue(`error creating article: ${err}`);
|
||||
log.error(err);
|
||||
}).finally(() => {
|
||||
streamResponse.cancel();
|
||||
|
@ -1,5 +1,5 @@
|
||||
import recipeSchema from "@lib/recipeSchema.ts";
|
||||
import { parseIngredient } from "@lib/parseIngredient.ts";
|
||||
import { parseIngredients } from "@lib/parseIngredient.ts";
|
||||
|
||||
export function parseJsonLdToRecipeSchema(jsonLdContent: string) {
|
||||
try {
|
||||
@ -20,8 +20,8 @@ export function parseJsonLdToRecipeSchema(jsonLdContent: string) {
|
||||
}
|
||||
|
||||
// Map and parse ingredients into the new schema
|
||||
const ingredients = (data.recipeIngredient || []).map(
|
||||
parseIngredient,
|
||||
const ingredients = parseIngredients(
|
||||
data?.recipeIngredient?.join("\n") || "",
|
||||
);
|
||||
|
||||
const instructions = Array.isArray(data.recipeInstructions)
|
||||
|
@ -46,6 +46,7 @@ function ValidRecipe({
|
||||
portion={portion}
|
||||
/>
|
||||
<h3 class="text-3xl my-5">Preparation</h3>
|
||||
<div class="pl-2">
|
||||
<ol class="list-decimal grid gap-4">
|
||||
{recipe.instructions && (recipe.instructions.map((instruction) => {
|
||||
return (
|
||||
@ -57,6 +58,7 @@ function ValidRecipe({
|
||||
);
|
||||
}))}
|
||||
</ol>
|
||||
</div>
|
||||
</>
|
||||
);
|
||||
}
|
||||
|
@ -116,3 +116,7 @@ input[type=number] {
|
||||
.highlight>pre {
|
||||
text-wrap: wrap;
|
||||
}
|
||||
|
||||
.list-decimal li::marker {
|
||||
color: #8a898c;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user