feat: url scraper to recipe
This commit is contained in:
17
lib/crud.ts
17
lib/crud.ts
@ -85,6 +85,9 @@ export function createCrud<T extends GenericResource>(
|
||||
}
|
||||
|
||||
const content = await getDocument(path);
|
||||
if (!content) {
|
||||
return;
|
||||
}
|
||||
|
||||
const parsed = parse(content, id);
|
||||
|
||||
@ -92,12 +95,13 @@ export function createCrud<T extends GenericResource>(
|
||||
return addThumbnailToResource(parsed);
|
||||
}
|
||||
const doc = { ...parsed, content };
|
||||
cache.set(path, doc);
|
||||
cache.set(path, doc, { expires: 10 * 1000 });
|
||||
|
||||
return doc;
|
||||
}
|
||||
function create(id: string, content: string | ArrayBuffer | T) {
|
||||
const path = pathFromId(id);
|
||||
cache.set("all", undefined);
|
||||
if (
|
||||
typeof content === "string" || content instanceof ArrayBuffer
|
||||
) {
|
||||
@ -105,7 +109,9 @@ export function createCrud<T extends GenericResource>(
|
||||
}
|
||||
|
||||
if (render) {
|
||||
return createDocument(path, render(content));
|
||||
const rendered = render(content);
|
||||
cache.set(path, content);
|
||||
return createDocument(path, rendered);
|
||||
}
|
||||
|
||||
throw new Error("No renderer defined for " + prefix + " CRUD");
|
||||
@ -114,7 +120,11 @@ export function createCrud<T extends GenericResource>(
|
||||
async function update(id: string, updater: (r: Root) => Root) {
|
||||
const path = pathFromId(id);
|
||||
const content = await getDocument(path);
|
||||
if (!content) {
|
||||
return;
|
||||
}
|
||||
const newDoc = transformDocument(content, updater);
|
||||
cache.set("all", undefined);
|
||||
await createDocument(path, newDoc);
|
||||
}
|
||||
|
||||
@ -132,7 +142,8 @@ export function createCrud<T extends GenericResource>(
|
||||
const id = doc.name.replace(prefix, "").replace(/\.md$/, "");
|
||||
return read(id);
|
||||
}),
|
||||
)).sort(sortFunction<T>(sort));
|
||||
)).sort(sortFunction<T>(sort)).filter((v) => !!v);
|
||||
|
||||
cache.set("all", parsed);
|
||||
return parsed;
|
||||
}
|
||||
|
@ -58,6 +58,10 @@ export function createDocument(
|
||||
|
||||
log.info("creating document", { name });
|
||||
|
||||
if (typeof content === "string") {
|
||||
updateDocument(name, content).catch(log.error);
|
||||
}
|
||||
|
||||
return fetch(SILVERBULLET_SERVER + "/" + name, {
|
||||
body: content,
|
||||
method: "PUT",
|
||||
@ -65,25 +69,49 @@ export function createDocument(
|
||||
});
|
||||
}
|
||||
|
||||
export async function getDocument(name: string): Promise<string> {
|
||||
const documents = await db.select().from(documentTable).where(
|
||||
eq(documentTable.name, name),
|
||||
).limit(1);
|
||||
if (documents[0]?.content) return documents[0].content;
|
||||
|
||||
async function fetchDocument(name: string) {
|
||||
log.debug("fetching document", { name });
|
||||
const headers = new Headers();
|
||||
headers.append("X-Sync-Mode", "true");
|
||||
const response = await fetch(SILVERBULLET_SERVER + "/" + name, { headers });
|
||||
const text = await response.text();
|
||||
if (response.status === 404) {
|
||||
return;
|
||||
}
|
||||
return response.text();
|
||||
}
|
||||
|
||||
await db.update(documentTable).set({
|
||||
content: text,
|
||||
}).where(eq(documentTable.name, name));
|
||||
export async function getDocument(name: string): Promise<string | undefined> {
|
||||
const documents = await db.select().from(documentTable).where(
|
||||
eq(documentTable.name, name),
|
||||
).limit(1);
|
||||
// This updates the document in the background
|
||||
fetchDocument(name).then((content) => {
|
||||
if (content) {
|
||||
updateDocument(name, content);
|
||||
} else {
|
||||
db.delete(documentTable).where(eq(documentTable.name, name));
|
||||
}
|
||||
}).catch(
|
||||
log.error,
|
||||
);
|
||||
if (documents[0]?.content) return documents[0].content;
|
||||
|
||||
const text = await fetchDocument(name);
|
||||
if (!text) {
|
||||
db.delete(documentTable).where(eq(documentTable.name, name));
|
||||
return;
|
||||
}
|
||||
await updateDocument(name, text);
|
||||
|
||||
return text;
|
||||
}
|
||||
|
||||
export function updateDocument(name: string, content: string) {
|
||||
return db.update(documentTable).set({
|
||||
content,
|
||||
}).where(eq(documentTable.name, name));
|
||||
}
|
||||
|
||||
export function transformDocument(input: string, cb: (r: Root) => Root) {
|
||||
const out = unified()
|
||||
.use(remarkParse)
|
||||
|
@ -1,7 +1,9 @@
|
||||
import OpenAI from "https://deno.land/x/openai@v4.52.0/mod.ts";
|
||||
import OpenAI from "https://deno.land/x/openai@v4.69.0/mod.ts";
|
||||
import { zodResponseFormat } from "https://deno.land/x/openai@v4.69.0/helpers/zod.ts";
|
||||
import { OPENAI_API_KEY } from "@lib/env.ts";
|
||||
import { hashString } from "@lib/helpers.ts";
|
||||
import { createCache } from "@lib/cache.ts";
|
||||
import recipeSchema from "@lib/recipeSchema.ts";
|
||||
|
||||
const openAI = OPENAI_API_KEY && new OpenAI({ apiKey: OPENAI_API_KEY });
|
||||
|
||||
@ -208,3 +210,21 @@ export async function createTags(content: string) {
|
||||
|
||||
return extractListFromResponse(res).map((v) => v.replaceAll(" ", "-"));
|
||||
}
|
||||
|
||||
export async function extractRecipe(content: string) {
|
||||
if (!openAI) return;
|
||||
const completion = await openAI.beta.chat.completions.parse({
|
||||
model: "gpt-4o-2024-08-06",
|
||||
temperature: 0.1,
|
||||
messages: [
|
||||
{
|
||||
role: "system",
|
||||
content: "Extract the recipe information from the provided markdown.",
|
||||
},
|
||||
{ role: "user", content },
|
||||
],
|
||||
response_format: zodResponseFormat(recipeSchema, "recipe-v2"),
|
||||
});
|
||||
|
||||
return recipeSchema.parse(completion.choices[0].message.parsed);
|
||||
}
|
||||
|
35
lib/parseIngredient.ts
Normal file
35
lib/parseIngredient.ts
Normal file
@ -0,0 +1,35 @@
|
||||
import { parseIngredient as _parseIngredient } from "https://esm.sh/parse-ingredient@1.0.1";
|
||||
|
||||
export function parseIngredient(text: string) {
|
||||
const ing = _parseIngredient(text, {
|
||||
additionalUOMs: {
|
||||
tableSpoon: {
|
||||
short: "EL",
|
||||
plural: "Table Spoons",
|
||||
alternates: ["el", "EL", "Tbsp", "tbsp"],
|
||||
},
|
||||
teaSpoon: {
|
||||
short: "TL",
|
||||
plural: "Tea Spoon",
|
||||
alternates: ["tl", "TL", "Tsp", "tsp", "teaspoon"],
|
||||
},
|
||||
litre: {
|
||||
short: "L",
|
||||
plural: "liters",
|
||||
alternates: ["L", "l"],
|
||||
},
|
||||
paket: {
|
||||
short: "Paket",
|
||||
plural: "Pakets",
|
||||
alternates: ["Paket", "paket"],
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
return {
|
||||
name: ing[0].description,
|
||||
unit: ing[0].unitOfMeasure || "",
|
||||
quantity: ing[0].quantity?.toString() || "",
|
||||
note: "",
|
||||
};
|
||||
}
|
55
lib/playwright.ts
Normal file
55
lib/playwright.ts
Normal file
@ -0,0 +1,55 @@
|
||||
import { firefox } from "npm:playwright-extra";
|
||||
import { createStreamResponse } from "@lib/helpers.ts";
|
||||
import StealthPlugin from "npm:puppeteer-extra-plugin-stealth";
|
||||
|
||||
const userAgentStrings = [
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.2227.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.3497.92 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36",
|
||||
];
|
||||
|
||||
firefox.use(StealthPlugin());
|
||||
|
||||
export async function fetchHtmlWithPlaywright(
|
||||
fetchUrl: string,
|
||||
streamResponse: ReturnType<typeof createStreamResponse>,
|
||||
): Promise<string> {
|
||||
streamResponse.enqueue("booting up playwright");
|
||||
// Launch the Playwright browser
|
||||
const browser = await firefox.launch();
|
||||
|
||||
streamResponse.enqueue("fetching html");
|
||||
|
||||
try {
|
||||
// Open a new browser context and page
|
||||
const context = await browser.newContext({
|
||||
userAgent:
|
||||
userAgentStrings[Math.floor(Math.random() * userAgentStrings.length)],
|
||||
});
|
||||
|
||||
//add init script
|
||||
await context.addInitScript(
|
||||
"Object.defineProperty(navigator, 'webdriver', {get: () => undefined})",
|
||||
);
|
||||
|
||||
const page = await context.newPage();
|
||||
|
||||
// Navigate to the URL
|
||||
await page.goto(fetchUrl, {
|
||||
waitUntil: "domcontentloaded", // Wait for the DOM to load
|
||||
});
|
||||
|
||||
// Get the HTML content of the page
|
||||
const html = await page.content();
|
||||
|
||||
return html;
|
||||
} catch (error) {
|
||||
streamResponse.enqueue("error fetching html");
|
||||
console.error(error);
|
||||
return "";
|
||||
} finally {
|
||||
// Close the browser
|
||||
await browser.close();
|
||||
}
|
||||
}
|
39
lib/recipeSchema.ts
Normal file
39
lib/recipeSchema.ts
Normal file
@ -0,0 +1,39 @@
|
||||
import { z } from "npm:zod";
|
||||
|
||||
export const IngredientSchema = z.object({
|
||||
quantity: z.string().describe(
|
||||
"e.g., '2', '1/2', or an empty string for 'to taste'",
|
||||
),
|
||||
unit: z.string().describe('e.g., "g", "tbsp", "cup"'),
|
||||
name: z.string().describe('e.g., "sugar", "flour"'), //
|
||||
note: z.string().describe('optional, e.g., "sifted", "chopped finely"'),
|
||||
});
|
||||
export type Ingredient = z.infer<typeof IngredientSchema>;
|
||||
|
||||
export const IngredientGroupSchema = z.object({
|
||||
name: z.string(),
|
||||
items: z.array(IngredientSchema),
|
||||
});
|
||||
export type IngredientGroup = z.infer<typeof IngredientGroupSchema>;
|
||||
|
||||
const recipeSchema = z.object({
|
||||
title: z.string().describe(
|
||||
"Title of the Recipe, without the name of the website or author",
|
||||
),
|
||||
image: z.string().describe("URL of the main image of the recipe"),
|
||||
author: z.string().describe("author of the Recipe (optional)"),
|
||||
description: z.string().describe("Optional, short description of the recipe"),
|
||||
ingredients: z.array(z.union([IngredientSchema, IngredientGroupSchema]))
|
||||
.describe("List of ingredients"),
|
||||
instructions: z.array(z.string()).describe("List of instructions"),
|
||||
servings: z.number().describe("Amount of Portions"),
|
||||
prepTime: z.number().describe("Preparation time in minutes"),
|
||||
cookTime: z.number().describe("Cooking time in minutes"),
|
||||
totalTime: z.number().describe("Total time in minutes"),
|
||||
tags: z.array(z.string()).describe(
|
||||
"List of tags (e.g., ['vegan', 'dessert'])",
|
||||
),
|
||||
notes: z.array(z.string()).describe("Optional notes about the recipe"),
|
||||
});
|
||||
|
||||
export default recipeSchema;
|
@ -4,31 +4,22 @@ import {
|
||||
getTextOfRange,
|
||||
parseDocument,
|
||||
} from "@lib/documents.ts";
|
||||
import { parse } from "yaml";
|
||||
import { parseIngredient } from "https://esm.sh/parse-ingredient@1.0.1";
|
||||
import { parse, stringify } from "yaml";
|
||||
import { createCrud } from "@lib/crud.ts";
|
||||
import { extractHashTags } from "@lib/string.ts";
|
||||
|
||||
export type IngredientGroup = {
|
||||
name: string;
|
||||
ingredients: Ingredient[];
|
||||
};
|
||||
|
||||
export type Ingredient = {
|
||||
type: string;
|
||||
unit?: string;
|
||||
amount?: string;
|
||||
};
|
||||
|
||||
export type Ingredients = (Ingredient | IngredientGroup)[];
|
||||
import { Ingredient, IngredientGroup } from "@lib/recipeSchema.ts";
|
||||
import { fixRenderedMarkdown } from "@lib/helpers.ts";
|
||||
import { parseIngredient } from "@lib/parseIngredient.ts";
|
||||
|
||||
export type Recipe = {
|
||||
type: "recipe";
|
||||
id: string;
|
||||
name: string;
|
||||
description?: string;
|
||||
ingredients: Ingredients;
|
||||
preparation?: string;
|
||||
markdown?: string;
|
||||
ingredients: (Ingredient | IngredientGroup)[];
|
||||
instructions?: string[];
|
||||
notes?: string[];
|
||||
tags: string[];
|
||||
meta?: {
|
||||
time?: string;
|
||||
@ -49,38 +40,8 @@ function parseIngredientItem(listItem: DocumentChild): Ingredient | undefined {
|
||||
|
||||
const text = children.map((c) => getTextOfChild(c)).join(" ").trim();
|
||||
|
||||
const ing = parseIngredient(text, {
|
||||
additionalUOMs: {
|
||||
tableSpoon: {
|
||||
short: "EL",
|
||||
plural: "Table Spoons",
|
||||
alternates: ["el", "EL", "Tbsp", "tbsp"],
|
||||
},
|
||||
teaSpoon: {
|
||||
short: "TL",
|
||||
plural: "Tea Spoon",
|
||||
alternates: ["tl", "TL", "Tsp", "tsp", "teaspoon"],
|
||||
},
|
||||
litre: {
|
||||
short: "L",
|
||||
plural: "liters",
|
||||
alternates: ["L", "l"],
|
||||
},
|
||||
paket: {
|
||||
short: "Paket",
|
||||
plural: "Pakets",
|
||||
alternates: ["Paket", "paket"],
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
return {
|
||||
type: ing[0].description,
|
||||
unit: ing[0].unitOfMeasure,
|
||||
amount: ing[0].quantity,
|
||||
};
|
||||
return parseIngredient(text);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
const isIngredient = (item: Ingredient | undefined): item is Ingredient => {
|
||||
@ -112,9 +73,10 @@ function parseIngredients(children: DocumentChild[]): Recipe["ingredients"] {
|
||||
|
||||
if (!nextChild || nextChild.type !== "list") continue;
|
||||
|
||||
const name = getTextOfChild(child);
|
||||
ingredients.push({
|
||||
name: getTextOfChild(child) || "",
|
||||
ingredients: parseIngredientsList(nextChild),
|
||||
name: name || "",
|
||||
items: parseIngredientsList(nextChild),
|
||||
});
|
||||
skip = true;
|
||||
continue;
|
||||
@ -128,6 +90,19 @@ function parseIngredients(children: DocumentChild[]): Recipe["ingredients"] {
|
||||
return ingredients;
|
||||
}
|
||||
|
||||
function extractSteps(
|
||||
content: string,
|
||||
seperator: RegExp = /\n(?=\d+\.)/g,
|
||||
): string[] {
|
||||
const steps = content.split(seperator).map((step) => {
|
||||
const match = step.match(/^(\d+)\.\s*(.*)/);
|
||||
if (!match) return;
|
||||
const [, , text] = match;
|
||||
return text;
|
||||
}).filter((step) => !!step);
|
||||
return steps as string[];
|
||||
}
|
||||
|
||||
export function parseRecipe(original: string, id: string): Recipe {
|
||||
const doc = parseDocument(original);
|
||||
|
||||
@ -140,8 +115,8 @@ export function parseRecipe(original: string, id: string): Recipe {
|
||||
if (child.type === "yaml") {
|
||||
try {
|
||||
meta = parse(child.value) as Recipe["meta"];
|
||||
} catch (_) {
|
||||
// console.log("Error parsing YAML", err);
|
||||
} catch (err) {
|
||||
console.log("Error parsing YAML", err);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
@ -168,7 +143,14 @@ export function parseRecipe(original: string, id: string): Recipe {
|
||||
|
||||
const ingredients = parseIngredients(groups[1]);
|
||||
|
||||
const preparation = getTextOfRange(groups[2], original);
|
||||
const instructionText = getTextOfRange(groups[2], original);
|
||||
let instructions = extractSteps(instructionText || "");
|
||||
if (instructions.length <= 1) {
|
||||
const d = extractSteps(instructionText || "", /\n/g);
|
||||
if (d.length > instructions.length) {
|
||||
instructions = d;
|
||||
}
|
||||
}
|
||||
|
||||
const tags = extractHashTags(description || "");
|
||||
if (description) {
|
||||
@ -183,15 +165,88 @@ export function parseRecipe(original: string, id: string): Recipe {
|
||||
meta,
|
||||
name,
|
||||
tags,
|
||||
markdown: original,
|
||||
notes: getTextOfRange(groups[3], original)?.split("\n"),
|
||||
description,
|
||||
ingredients,
|
||||
preparation,
|
||||
instructions,
|
||||
};
|
||||
}
|
||||
|
||||
function filterUndefinedFromObject<T extends { [key: string]: unknown }>(
|
||||
obj: T,
|
||||
) {
|
||||
return Object.fromEntries(
|
||||
Object.entries(obj).filter(([_, v]) => v !== undefined),
|
||||
);
|
||||
}
|
||||
|
||||
export function renderRecipe(recipe: Recipe) {
|
||||
const meta = filterUndefinedFromObject(recipe.meta || {});
|
||||
|
||||
// Clean up meta properties
|
||||
delete meta.thumbnail;
|
||||
delete meta.average;
|
||||
|
||||
const recipeImage = meta.image ? `` : "";
|
||||
|
||||
// Format ingredient groups and standalone ingredients
|
||||
const ingredients = recipe.ingredients
|
||||
.map((item) => {
|
||||
if ("items" in item) {
|
||||
return `\n*${item.name}*\n${
|
||||
item.items
|
||||
.map((ing) => {
|
||||
if (ing.quantity && ing.unit) {
|
||||
return `- **${ing.quantity.trim() || ""}${
|
||||
ing.unit.trim() || ""
|
||||
}** ${ing.name}`;
|
||||
}
|
||||
return `- ${ing.name}`;
|
||||
})
|
||||
.join("\n")
|
||||
}`;
|
||||
}
|
||||
if (item.quantity && item.unit) {
|
||||
return `- **${item.quantity?.trim() || ""}${
|
||||
item.unit?.trim() || ""
|
||||
}** ${item.name}`;
|
||||
}
|
||||
return `- ${item.name}`;
|
||||
})
|
||||
.join("\n");
|
||||
|
||||
// Format instructions as a numbered list
|
||||
const instructions = recipe.instructions
|
||||
? recipe.instructions.map((step, i) => `${i + 1}. ${step}`).join("\n")
|
||||
: "";
|
||||
|
||||
// Render the final markdown
|
||||
return fixRenderedMarkdown(`${
|
||||
Object.keys(meta).length
|
||||
? `---
|
||||
${stringify(meta)}
|
||||
---`
|
||||
: `---
|
||||
---`
|
||||
}
|
||||
# ${recipe.name}
|
||||
${recipe.meta?.image ? recipeImage : ""}
|
||||
${recipe.tags.map((t) => `#${t.replaceAll(" ", "-")}`).join(" ")}
|
||||
${recipe.description || ""}
|
||||
|
||||
---
|
||||
|
||||
${ingredients ? `## Ingredients\n\n${ingredients}\n\n---\n` : ""}
|
||||
${instructions ? `${instructions}\n\n---` : ""}
|
||||
${recipe.notes?.length ? `\n${recipe.notes.join("\n")}` : ""}
|
||||
`);
|
||||
}
|
||||
|
||||
const crud = createCrud<Recipe>({
|
||||
prefix: `Recipes/`,
|
||||
parse: parseRecipe,
|
||||
render: renderRecipe,
|
||||
hasThumbnails: true,
|
||||
});
|
||||
|
||||
|
@ -6,17 +6,10 @@ export function formatDate(date: Date): string {
|
||||
}
|
||||
|
||||
export function safeFileName(inputString: string): string {
|
||||
// Convert the string to lowercase
|
||||
let fileName = inputString.toLowerCase();
|
||||
|
||||
// Replace spaces with underscores
|
||||
fileName = fileName.replace(/ /g, "_");
|
||||
|
||||
// Remove characters that are not safe for file names
|
||||
fileName = fileName.replace(/[^\w.-]/g, "");
|
||||
|
||||
fileName = fileName.replaceAll(":", "");
|
||||
|
||||
return fileName;
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user