refactor: simplify parse ingredients code
This commit is contained in:
@ -109,7 +109,7 @@ export async function getDocument(name: string): Promise<string | undefined> {
|
||||
export function updateDocument(name: string, content: string) {
|
||||
return db.update(documentTable).set({
|
||||
content,
|
||||
}).where(eq(documentTable.name, name));
|
||||
}).where(eq(documentTable.name, name)).run();
|
||||
}
|
||||
|
||||
export function transformDocument(input: string, cb: (r: Root) => Root) {
|
||||
|
@ -3,7 +3,7 @@ import { zodResponseFormat } from "https://deno.land/x/openai@v4.69.0/helpers/zo
|
||||
import { OPENAI_API_KEY } from "@lib/env.ts";
|
||||
import { hashString } from "@lib/helpers.ts";
|
||||
import { createCache } from "@lib/cache.ts";
|
||||
import recipeSchema from "@lib/recipeSchema.ts";
|
||||
import recipeSchema, { recipeResponseSchema } from "@lib/recipeSchema.ts";
|
||||
|
||||
const openAI = OPENAI_API_KEY && new OpenAI({ apiKey: OPENAI_API_KEY });
|
||||
|
||||
@ -223,7 +223,7 @@ export async function extractRecipe(content: string) {
|
||||
},
|
||||
{ role: "user", content },
|
||||
],
|
||||
response_format: zodResponseFormat(recipeSchema, "recipe-v2"),
|
||||
response_format: zodResponseFormat(recipeResponseSchema, "recipe-v2"),
|
||||
});
|
||||
|
||||
return recipeSchema.parse(completion.choices[0].message.parsed);
|
||||
|
@ -1,35 +1,94 @@
|
||||
import { parseIngredient as _parseIngredient } from "https://esm.sh/parse-ingredient@1.0.1";
|
||||
import {
|
||||
parseIngredient,
|
||||
unitsOfMeasure as _unitsOfMeasure,
|
||||
} from "https://esm.sh/parse-ingredient@1.2.1";
|
||||
import { Ingredient, IngredientGroup } from "@lib/recipeSchema.ts";
|
||||
import { removeMarkdownFormatting } from "@lib/string.ts";
|
||||
|
||||
export function parseIngredient(text: string) {
|
||||
const ing = _parseIngredient(text, {
|
||||
additionalUOMs: {
|
||||
tableSpoon: {
|
||||
short: "EL",
|
||||
plural: "Table Spoons",
|
||||
alternates: ["el", "EL", "Tbsp", "tbsp"],
|
||||
},
|
||||
teaSpoon: {
|
||||
short: "TL",
|
||||
plural: "Tea Spoon",
|
||||
alternates: ["tl", "TL", "Tsp", "tsp", "teaspoon"],
|
||||
},
|
||||
litre: {
|
||||
short: "L",
|
||||
plural: "liters",
|
||||
alternates: ["L", "l"],
|
||||
},
|
||||
paket: {
|
||||
short: "Paket",
|
||||
plural: "Pakets",
|
||||
alternates: ["Paket", "paket"],
|
||||
},
|
||||
},
|
||||
const customUnits = {
|
||||
tableSpoon: {
|
||||
short: "EL",
|
||||
plural: "Table Spoons",
|
||||
alternates: ["el", "EL", "Tbsp", "tbsp"],
|
||||
},
|
||||
dose: {
|
||||
short: "Dose",
|
||||
plural: "Dosen",
|
||||
alternates: ["Dose", "dose", "Dose(n)"],
|
||||
},
|
||||
pound: {
|
||||
short: "lb",
|
||||
plural: "pounds",
|
||||
alternates: ["lb", "lbs", "pound", "pounds"],
|
||||
},
|
||||
teaSpoon: {
|
||||
short: "TL",
|
||||
plural: "Tea Spoon",
|
||||
alternates: ["tl", "TL", "Tsp", "tsp", "teaspoon"],
|
||||
},
|
||||
litre: {
|
||||
short: "L",
|
||||
plural: "liters",
|
||||
alternates: ["L", "l"],
|
||||
},
|
||||
paket: {
|
||||
short: "Paket",
|
||||
plural: "Pakets",
|
||||
alternates: ["Paket", "paket"],
|
||||
},
|
||||
};
|
||||
|
||||
export const unitsOfMeasure = {
|
||||
..._unitsOfMeasure,
|
||||
...customUnits,
|
||||
} as const;
|
||||
|
||||
export function parseIngredients(
|
||||
text: string,
|
||||
): (Ingredient | IngredientGroup)[] {
|
||||
const cleanText = removeMarkdownFormatting(text);
|
||||
|
||||
const ingredients = parseIngredient(cleanText, {
|
||||
normalizeUOM: true,
|
||||
additionalUOMs: customUnits,
|
||||
});
|
||||
|
||||
return {
|
||||
name: ing[0].description,
|
||||
unit: ing[0].unitOfMeasure || "",
|
||||
quantity: ing[0].quantity?.toString() || "",
|
||||
note: "",
|
||||
};
|
||||
const results: (Ingredient | IngredientGroup)[] = [];
|
||||
let currentGroup: IngredientGroup | undefined;
|
||||
|
||||
for (const ing of ingredients) {
|
||||
if (ing.isGroupHeader) {
|
||||
if (currentGroup) {
|
||||
results.push(currentGroup);
|
||||
}
|
||||
currentGroup = {
|
||||
name: ing.description.replace(/:$/, ""),
|
||||
items: [],
|
||||
};
|
||||
} else {
|
||||
const ingredient = {
|
||||
name: ing.description.replace(/^\s?-/, "").trim(),
|
||||
unit: ing.unitOfMeasure || "",
|
||||
quantity: ing.quantity?.toString() || ing.quantity2?.toString() || "",
|
||||
note: "",
|
||||
};
|
||||
|
||||
const unit = ingredient.unit.toLowerCase() as keyof typeof unitsOfMeasure;
|
||||
if (unit in unitsOfMeasure && unit !== "cup") {
|
||||
ingredient.unit = unitsOfMeasure[unit].short;
|
||||
}
|
||||
|
||||
if (!currentGroup) {
|
||||
results.push(ingredient);
|
||||
} else {
|
||||
currentGroup.items.push(ingredient);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (currentGroup) {
|
||||
results.push(currentGroup);
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
@ -16,6 +16,7 @@ export async function fetchHtmlWithPlaywright(
|
||||
streamResponse: ReturnType<typeof createStreamResponse>,
|
||||
): Promise<string> {
|
||||
streamResponse.enqueue("booting up playwright");
|
||||
|
||||
// Launch the Playwright browser
|
||||
const browser = await firefox.launch();
|
||||
|
||||
|
@ -36,4 +36,12 @@ const recipeSchema = z.object({
|
||||
notes: z.array(z.string()).describe("Optional notes about the recipe"),
|
||||
});
|
||||
|
||||
const noRecipeSchema = z.object({
|
||||
errorMessages: z.array(z.string()).describe(
|
||||
"List of error messages, if no recipe was found",
|
||||
),
|
||||
});
|
||||
|
||||
export const recipeResponseSchema = z.union([recipeSchema, noRecipeSchema]);
|
||||
|
||||
export default recipeSchema;
|
||||
|
@ -1,6 +1,5 @@
|
||||
import {
|
||||
type DocumentChild,
|
||||
getTextOfChild,
|
||||
getTextOfRange,
|
||||
parseDocument,
|
||||
} from "@lib/documents.ts";
|
||||
@ -9,7 +8,7 @@ import { createCrud } from "@lib/crud.ts";
|
||||
import { extractHashTags } from "@lib/string.ts";
|
||||
import { Ingredient, IngredientGroup } from "@lib/recipeSchema.ts";
|
||||
import { fixRenderedMarkdown } from "@lib/helpers.ts";
|
||||
import { parseIngredient } from "@lib/parseIngredient.ts";
|
||||
import { parseIngredients } from "@lib/parseIngredient.ts";
|
||||
|
||||
export type Recipe = {
|
||||
type: "recipe";
|
||||
@ -33,72 +32,14 @@ export type Recipe = {
|
||||
};
|
||||
};
|
||||
|
||||
function parseIngredientItem(listItem: DocumentChild): Ingredient | undefined {
|
||||
if (listItem.type === "listItem") {
|
||||
const children: DocumentChild[] = listItem.children[0]?.children ||
|
||||
listItem.children;
|
||||
|
||||
const text = children.map((c) => getTextOfChild(c)).join(" ").trim();
|
||||
|
||||
return parseIngredient(text);
|
||||
}
|
||||
}
|
||||
|
||||
const isIngredient = (item: Ingredient | undefined): item is Ingredient => {
|
||||
return !!item;
|
||||
};
|
||||
|
||||
function parseIngredientsList(list: DocumentChild): Ingredient[] {
|
||||
if (list.type === "list" && "children" in list) {
|
||||
return list.children.map((listItem) => {
|
||||
return parseIngredientItem(listItem);
|
||||
}).filter(isIngredient);
|
||||
}
|
||||
return [];
|
||||
}
|
||||
|
||||
function parseIngredients(children: DocumentChild[]): Recipe["ingredients"] {
|
||||
const ingredients: (Ingredient | IngredientGroup)[] = [];
|
||||
if (!children) return [];
|
||||
let skip = false;
|
||||
for (let i = 0; i < children.length; i++) {
|
||||
if (skip) {
|
||||
skip = false;
|
||||
continue;
|
||||
}
|
||||
const child = children[i];
|
||||
|
||||
if (child.type === "paragraph") {
|
||||
const nextChild = children[i + 1];
|
||||
|
||||
if (!nextChild || nextChild.type !== "list") continue;
|
||||
|
||||
const name = getTextOfChild(child);
|
||||
ingredients.push({
|
||||
name: name || "",
|
||||
items: parseIngredientsList(nextChild),
|
||||
});
|
||||
skip = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (child.type === "list") {
|
||||
ingredients.push(...parseIngredientsList(child));
|
||||
}
|
||||
}
|
||||
|
||||
return ingredients;
|
||||
}
|
||||
|
||||
function extractSteps(
|
||||
content: string,
|
||||
seperator: RegExp = /\n(?=\d+\.)/g,
|
||||
): string[] {
|
||||
const steps = content.split(seperator).map((step) => {
|
||||
const match = step.match(/^(\d+)\.\s*(.*)/);
|
||||
if (!match) return;
|
||||
const [, , text] = match;
|
||||
return text;
|
||||
if (match) return match[2];
|
||||
return step;
|
||||
}).filter((step) => !!step);
|
||||
return steps as string[];
|
||||
}
|
||||
@ -141,7 +82,14 @@ export function parseRecipe(original: string, id: string): Recipe {
|
||||
|
||||
let description = getTextOfRange(groups[0], original);
|
||||
|
||||
const ingredients = parseIngredients(groups[1]);
|
||||
let ingredientsText = getTextOfRange(groups[1], original);
|
||||
if (ingredientsText) {
|
||||
ingredientsText = ingredientsText.replace(/#+\s?Ingredients?/, "");
|
||||
} else {
|
||||
ingredientsText = "";
|
||||
}
|
||||
|
||||
const ingredients = parseIngredients(ingredientsText);
|
||||
|
||||
const instructionText = getTextOfRange(groups[2], original);
|
||||
let instructions = extractSteps(instructionText || "");
|
||||
|
@ -55,8 +55,6 @@ const isResource = (
|
||||
export async function searchResource(
|
||||
{ q, tags = [], types, authors, rating }: SearchParams,
|
||||
): Promise<GenericResource[]> {
|
||||
console.log("searchResource", { q, tags, types, authors, rating });
|
||||
|
||||
let resources = (await Promise.all([
|
||||
(!types || types.includes("movie")) && getAllMovies(),
|
||||
(!types || types.includes("series")) && getAllSeries(),
|
||||
|
@ -133,3 +133,41 @@ export function parseTimeCacheKey(key: string) {
|
||||
export function rgbToHex(r: number, g: number, b: number) {
|
||||
return "#" + componentToHex(r) + componentToHex(g) + componentToHex(b);
|
||||
}
|
||||
|
||||
export function removeMarkdownFormatting(text: string): string {
|
||||
// Remove code blocks
|
||||
text = text.replace(/```[\s\S]*?```/g, "");
|
||||
|
||||
// Remove inline code
|
||||
text = text.replace(/`([^`]+)`/g, "$1");
|
||||
|
||||
// Remove images
|
||||
text = text.replace(/!\[.*?\]\(.*?\)/g, "");
|
||||
|
||||
// Remove links
|
||||
text = text.replace(/\[([^\]]+)\]\([^\)]+\)/g, "$1");
|
||||
|
||||
// Remove bold and italic formatting
|
||||
text = text.replace(/(\*\*|__)(.*?)\1/g, "$2"); // Bold
|
||||
text = text.replace(/(\*|_)(.*?)\1/g, "$2"); // Italic
|
||||
|
||||
// Remove strikethrough
|
||||
text = text.replace(/~~(.*?)~~/g, "$1");
|
||||
|
||||
// Remove headings
|
||||
text = text.replace(/^#{1,6}\s*(.+)$/gm, "$1");
|
||||
|
||||
// Remove blockquotes
|
||||
text = text.replace(/^>\s*/gm, "");
|
||||
|
||||
// Remove unordered list markers
|
||||
text = text.replace(/^[-*+]\s+/gm, "-");
|
||||
|
||||
// Remove ordered list markers
|
||||
text = text.replace(/^\d+\.\s+/gm, "");
|
||||
|
||||
// Remove horizontal rules
|
||||
text = text.replace(/^---+$/gm, "");
|
||||
|
||||
return text;
|
||||
}
|
||||
|
Reference in New Issue
Block a user