feat: url scraper to recipe
This commit is contained in:
		| @@ -7,6 +7,7 @@ WORKDIR /app | ||||
| COPY . . | ||||
|  | ||||
| RUN apk add curl libstdc++ &&\ | ||||
|     deno run -A npm:playwright install firefox &&\ | ||||
|     deno install --allow-import --allow-ffi --allow-scripts=npm:sharp@0.33.5-rc.1 -e main.ts &&\ | ||||
|     sed -i -e 's/"deno"/"no-deno"/' node_modules/@libsql/client/package.json &&\ | ||||
|     mkdir -p $DATA_DIR | ||||
|   | ||||
							
								
								
									
										494
									
								
								article.html
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										494
									
								
								article.html
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							| @@ -95,7 +95,7 @@ export function Card( | ||||
| export function ResourceCard( | ||||
|   { res, sublink = "movies" }: { sublink?: string; res: GenericResource }, | ||||
| ) { | ||||
|   const { meta: { image } = {} } = res; | ||||
|   const { meta: { image } = {} } = res || {}; | ||||
|  | ||||
|   const imageUrl = image | ||||
|     ? `/api/images?image=${image}&width=200&height=200` | ||||
|   | ||||
| @@ -1,22 +1,22 @@ | ||||
| import { ComponentChildren } from "preact"; | ||||
| import Search from "@islands/Search.tsx"; | ||||
| import { GenericResource, SearchResult } from "@lib/types.ts"; | ||||
| import { GenericResource } from "@lib/types.ts"; | ||||
|  | ||||
| export type Props = { | ||||
|   children: ComponentChildren; | ||||
|   title?: string; | ||||
|   name?: string; | ||||
|   url: URL; | ||||
|   url: URL | string; | ||||
|   description?: string; | ||||
|   context?: { type: string }; | ||||
|   searchResults?: GenericResource[]; | ||||
| }; | ||||
|  | ||||
| export const MainLayout = ( | ||||
|   { children, url, title, context, searchResults }: Props, | ||||
|   { children, url, context, searchResults }: Props, | ||||
| ) => { | ||||
|   const _url = typeof url === "string" ? new URL(url) : url; | ||||
|   const hasSearch = _url.search.includes("q="); | ||||
|   const hasSearch = _url?.search?.includes("q="); | ||||
|  | ||||
|   if (hasSearch) { | ||||
|     return ( | ||||
|   | ||||
							
								
								
									
										11
									
								
								deno.json
									
									
									
									
									
								
							
							
						
						
									
										11
									
								
								deno.json
									
									
									
									
									
								
							| @@ -34,25 +34,24 @@ | ||||
|     "drizzle-kit": "npm:drizzle-kit@^0.30.1", | ||||
|     "drizzle-orm": "npm:drizzle-orm@^0.38.3", | ||||
|     "fuzzysort": "npm:fuzzysort@^3.1.0", | ||||
|     "playwright": "npm:playwright@^1.49.1", | ||||
|     "playwright-extra": "npm:playwright-extra@^4.3.6", | ||||
|     "preact": "https://esm.sh/preact@10.22.0", | ||||
|     "preact-render-to-string": "https://esm.sh/*preact-render-to-string@6.2.2", | ||||
|     "preact/": "https://esm.sh/preact@10.22.0/", | ||||
|     "gfm": "jsr:@deno/gfm", | ||||
|     "puppeteer-extra-plugin-stealth": "npm:puppeteer-extra-plugin-stealth@^2.11.2", | ||||
|     "tailwindcss": "npm:tailwindcss@^3.4.17", | ||||
|     "tailwindcss/": "npm:/tailwindcss@^3.4.17/", | ||||
|     "tailwindcss/plugin": "npm:/tailwindcss@^3.4.17/plugin.js", | ||||
|     "camelcase-css": "npm:camelcase-css", | ||||
|     "tsx": "npm:tsx@^4.19.2", | ||||
|     "yaml": "https://deno.land/std@0.197.0/yaml/mod.ts", | ||||
|     "zod": "https://deno.land/x/zod@v3.21.4/mod.ts", | ||||
|     "zod": "npm:zod@^3.24.1", | ||||
|     "domparser": "https://deno.land/x/deno_dom@v0.1.48/deno-dom-wasm.ts", | ||||
|     "fs": "https://deno.land/std/fs/mod.ts", | ||||
|     "imagemagick": "https://deno.land/x/imagemagick_deno@0.0.31/mod.ts" | ||||
|   }, | ||||
|   "scopes": { | ||||
|     "https://deno.land/x/emoji/": { | ||||
|       "https://deno.land/x/my-library@1.0.0/mod.ts": "./patched/mod.ts" | ||||
|     } | ||||
|   }, | ||||
|   "compilerOptions": { | ||||
|     "jsx": "react-jsx", | ||||
|     "jsxImportSource": "preact" | ||||
|   | ||||
| @@ -14,6 +14,7 @@ import * as $api_articles_index from "./routes/api/articles/index.ts"; | ||||
| import * as $api_auth_callback from "./routes/api/auth/callback.ts"; | ||||
| import * as $api_auth_login from "./routes/api/auth/login.ts"; | ||||
| import * as $api_auth_logout from "./routes/api/auth/logout.ts"; | ||||
| import * as $api_cache from "./routes/api/cache.ts"; | ||||
| import * as $api_images_index from "./routes/api/images/index.ts"; | ||||
| import * as $api_index from "./routes/api/index.ts"; | ||||
| import * as $api_logs from "./routes/api/logs.ts"; | ||||
| @@ -22,6 +23,8 @@ import * as $api_movies_enhance_name_ from "./routes/api/movies/enhance/[name].t | ||||
| import * as $api_movies_index from "./routes/api/movies/index.ts"; | ||||
| import * as $api_query_index from "./routes/api/query/index.ts"; | ||||
| import * as $api_recipes_name_ from "./routes/api/recipes/[name].ts"; | ||||
| import * as $api_recipes_create_index from "./routes/api/recipes/create/index.ts"; | ||||
| import * as $api_recipes_create_parseJsonLd from "./routes/api/recipes/create/parseJsonLd.ts"; | ||||
| import * as $api_recipes_index from "./routes/api/recipes/index.ts"; | ||||
| import * as $api_recommendation_all from "./routes/api/recommendation/all.ts"; | ||||
| import * as $api_recommendation_data from "./routes/api/recommendation/data.ts"; | ||||
| @@ -50,6 +53,7 @@ import * as $KMenu_commands_add_movie_infos from "./islands/KMenu/commands/add_m | ||||
| import * as $KMenu_commands_add_series_infos from "./islands/KMenu/commands/add_series_infos.ts"; | ||||
| import * as $KMenu_commands_create_article from "./islands/KMenu/commands/create_article.ts"; | ||||
| import * as $KMenu_commands_create_movie from "./islands/KMenu/commands/create_movie.ts"; | ||||
| import * as $KMenu_commands_create_recipe from "./islands/KMenu/commands/create_recipe.ts"; | ||||
| import * as $KMenu_commands_create_recommendations from "./islands/KMenu/commands/create_recommendations.ts"; | ||||
| import * as $KMenu_commands_create_series from "./islands/KMenu/commands/create_series.ts"; | ||||
| import * as $KMenu_types from "./islands/KMenu/types.ts"; | ||||
| @@ -71,6 +75,7 @@ const manifest = { | ||||
|     "./routes/api/auth/callback.ts": $api_auth_callback, | ||||
|     "./routes/api/auth/login.ts": $api_auth_login, | ||||
|     "./routes/api/auth/logout.ts": $api_auth_logout, | ||||
|     "./routes/api/cache.ts": $api_cache, | ||||
|     "./routes/api/images/index.ts": $api_images_index, | ||||
|     "./routes/api/index.ts": $api_index, | ||||
|     "./routes/api/logs.ts": $api_logs, | ||||
| @@ -79,6 +84,9 @@ const manifest = { | ||||
|     "./routes/api/movies/index.ts": $api_movies_index, | ||||
|     "./routes/api/query/index.ts": $api_query_index, | ||||
|     "./routes/api/recipes/[name].ts": $api_recipes_name_, | ||||
|     "./routes/api/recipes/create/index.ts": $api_recipes_create_index, | ||||
|     "./routes/api/recipes/create/parseJsonLd.ts": | ||||
|       $api_recipes_create_parseJsonLd, | ||||
|     "./routes/api/recipes/index.ts": $api_recipes_index, | ||||
|     "./routes/api/recommendation/all.ts": $api_recommendation_all, | ||||
|     "./routes/api/recommendation/data.ts": $api_recommendation_data, | ||||
| @@ -112,6 +120,7 @@ const manifest = { | ||||
|     "./islands/KMenu/commands/create_article.ts": | ||||
|       $KMenu_commands_create_article, | ||||
|     "./islands/KMenu/commands/create_movie.ts": $KMenu_commands_create_movie, | ||||
|     "./islands/KMenu/commands/create_recipe.ts": $KMenu_commands_create_recipe, | ||||
|     "./islands/KMenu/commands/create_recommendations.ts": | ||||
|       $KMenu_commands_create_recommendations, | ||||
|     "./islands/KMenu/commands/create_series.ts": $KMenu_commands_create_series, | ||||
|   | ||||
| @@ -1,15 +1,15 @@ | ||||
| import { Signal } from "@preact/signals"; | ||||
| import type { | ||||
|   Ingredient, | ||||
|   IngredientGroup, | ||||
|   Ingredients, | ||||
| } from "../lib/recipes.ts"; | ||||
| import type { Ingredient, IngredientGroup } from "@lib/recipeSchema.ts"; | ||||
| import { FunctionalComponent } from "preact"; | ||||
|  | ||||
| function numberToString(num: number) { | ||||
|   return (Math.floor(num * 4) / 4).toString(); | ||||
| } | ||||
|  | ||||
| function stringToNumber(str: string) { | ||||
|   return parseFloat(str); | ||||
| } | ||||
|  | ||||
| const Ingredient = ( | ||||
|   { ingredient, amount, key = "", portion = 1 }: { | ||||
|     ingredient: Ingredient; | ||||
| @@ -18,10 +18,12 @@ const Ingredient = ( | ||||
|     portion?: number; | ||||
|   }, | ||||
| ) => { | ||||
|   const { type, amount: _amount, unit } = ingredient; | ||||
|   const { name, quantity, unit } = ingredient; | ||||
|  | ||||
|   const finalAmount = (typeof _amount === "number" && amount) | ||||
|     ? (_amount / portion) * (amount?.value || 1) | ||||
|   const parsedQuantity = stringToNumber(quantity); | ||||
|  | ||||
|   const finalAmount = (typeof parsedQuantity === "number" && amount) | ||||
|     ? (parsedQuantity / portion) * (amount?.value || 1) | ||||
|     : ""; | ||||
|  | ||||
|   return ( | ||||
| @@ -30,13 +32,17 @@ const Ingredient = ( | ||||
|         {numberToString(finalAmount || 0) + | ||||
|           (typeof unit === "string" ? unit : "")} | ||||
|       </td> | ||||
|       <td class="px-4 py-2">{type}</td> | ||||
|       <td class="px-4 py-2">{name}</td> | ||||
|     </tr> | ||||
|   ); | ||||
| }; | ||||
|  | ||||
| export const IngredientsList: FunctionalComponent< | ||||
|   { ingredients: Ingredients; amount: Signal<number>; portion?: number } | ||||
|   { | ||||
|     ingredients: (Ingredient | IngredientGroup)[]; | ||||
|     amount: Signal<number>; | ||||
|     portion?: number; | ||||
|   } | ||||
| > = ( | ||||
|   { ingredients, amount, portion }, | ||||
| ) => { | ||||
| @@ -44,10 +50,9 @@ export const IngredientsList: FunctionalComponent< | ||||
|     <table class="w-full border-collapse table-auto"> | ||||
|       <tbody> | ||||
|         {ingredients.map((item, index) => { | ||||
|           if ("name" in item) { | ||||
|           if ("items" in item) { | ||||
|             // Render IngredientGroup | ||||
|             const { name, ingredients: groupIngredients } = | ||||
|               item as IngredientGroup; | ||||
|             const { name, items: groupIngredients } = item as IngredientGroup; | ||||
|  | ||||
|             return ( | ||||
|               <> | ||||
|   | ||||
| @@ -6,6 +6,7 @@ import { getCookie } from "@lib/string.ts"; | ||||
| import { addSeriesInfo } from "@islands/KMenu/commands/add_series_infos.ts"; | ||||
| import { createNewSeries } from "@islands/KMenu/commands/create_series.ts"; | ||||
| import { updateAllRecommendations } from "@islands/KMenu/commands/create_recommendations.ts"; | ||||
| import { createNewRecipe } from "@islands/KMenu/commands/create_recipe.ts"; | ||||
|  | ||||
| export const menus: Record<string, Menu> = { | ||||
|   main: { | ||||
| @@ -74,6 +75,7 @@ export const menus: Record<string, Menu> = { | ||||
|       createNewArticle, | ||||
|       createNewMovie, | ||||
|       createNewSeries, | ||||
|       createNewRecipe, | ||||
|       addMovieInfos, | ||||
|       updateAllRecommendations, | ||||
|     ], | ||||
|   | ||||
							
								
								
									
										46
									
								
								islands/KMenu/commands/create_recipe.ts
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										46
									
								
								islands/KMenu/commands/create_recipe.ts
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,46 @@ | ||||
| import { MenuEntry } from "@islands/KMenu/types.ts"; | ||||
| import { fetchStream, isValidUrl } from "@lib/helpers.ts"; | ||||
| import { getCookie } from "@lib/string.ts"; | ||||
|  | ||||
| export const createNewRecipe: MenuEntry = { | ||||
|   title: "Create new recipe", | ||||
|   meta: "", | ||||
|   icon: "IconSquareRoundedPlus", | ||||
|   cb: (state) => { | ||||
|     state.menus["input_link"] = { | ||||
|       title: "Link:", | ||||
|       entries: [], | ||||
|     }; | ||||
|  | ||||
|     state.activeMenu.value = "input_link"; | ||||
|     state.activeState.value = "input"; | ||||
|  | ||||
|     const unsub = state.commandInput.subscribe((value) => { | ||||
|       if (isValidUrl(value)) { | ||||
|         unsub(); | ||||
|  | ||||
|         state.activeState.value = "loading"; | ||||
|  | ||||
|         fetchStream("/api/recipes/create?url=" + value, (chunk) => { | ||||
|           if (chunk.startsWith("id:")) { | ||||
|             state.loadingText.value = "Finished"; | ||||
|             setTimeout(() => { | ||||
|               globalThis.location.href = "/recipes/" + | ||||
|                 chunk.replace("id:", "").trim(); | ||||
|             }, 500); | ||||
|           } else { | ||||
|             state.loadingText.value = chunk; | ||||
|           } | ||||
|         }); | ||||
|       } | ||||
|     }); | ||||
|   }, | ||||
|   visible: () => { | ||||
|     if (!getCookie("session_cookie")) return false; | ||||
|     if ( | ||||
|       !globalThis?.location?.pathname?.includes("recipes") && | ||||
|       globalThis?.location?.pathname !== "/" | ||||
|     ) return false; | ||||
|     return true; | ||||
|   }, | ||||
| }; | ||||
							
								
								
									
										17
									
								
								lib/crud.ts
									
									
									
									
									
								
							
							
						
						
									
										17
									
								
								lib/crud.ts
									
									
									
									
									
								
							| @@ -85,6 +85,9 @@ export function createCrud<T extends GenericResource>( | ||||
|     } | ||||
|  | ||||
|     const content = await getDocument(path); | ||||
|     if (!content) { | ||||
|       return; | ||||
|     } | ||||
|  | ||||
|     const parsed = parse(content, id); | ||||
|  | ||||
| @@ -92,12 +95,13 @@ export function createCrud<T extends GenericResource>( | ||||
|       return addThumbnailToResource(parsed); | ||||
|     } | ||||
|     const doc = { ...parsed, content }; | ||||
|     cache.set(path, doc); | ||||
|     cache.set(path, doc, { expires: 10 * 1000 }); | ||||
|  | ||||
|     return doc; | ||||
|   } | ||||
|   function create(id: string, content: string | ArrayBuffer | T) { | ||||
|     const path = pathFromId(id); | ||||
|     cache.set("all", undefined); | ||||
|     if ( | ||||
|       typeof content === "string" || content instanceof ArrayBuffer | ||||
|     ) { | ||||
| @@ -105,7 +109,9 @@ export function createCrud<T extends GenericResource>( | ||||
|     } | ||||
|  | ||||
|     if (render) { | ||||
|       return createDocument(path, render(content)); | ||||
|       const rendered = render(content); | ||||
|       cache.set(path, content); | ||||
|       return createDocument(path, rendered); | ||||
|     } | ||||
|  | ||||
|     throw new Error("No renderer defined for " + prefix + " CRUD"); | ||||
| @@ -114,7 +120,11 @@ export function createCrud<T extends GenericResource>( | ||||
|   async function update(id: string, updater: (r: Root) => Root) { | ||||
|     const path = pathFromId(id); | ||||
|     const content = await getDocument(path); | ||||
|     if (!content) { | ||||
|       return; | ||||
|     } | ||||
|     const newDoc = transformDocument(content, updater); | ||||
|     cache.set("all", undefined); | ||||
|     await createDocument(path, newDoc); | ||||
|   } | ||||
|  | ||||
| @@ -132,7 +142,8 @@ export function createCrud<T extends GenericResource>( | ||||
|         const id = doc.name.replace(prefix, "").replace(/\.md$/, ""); | ||||
|         return read(id); | ||||
|       }), | ||||
|     )).sort(sortFunction<T>(sort)); | ||||
|     )).sort(sortFunction<T>(sort)).filter((v) => !!v); | ||||
|  | ||||
|     cache.set("all", parsed); | ||||
|     return parsed; | ||||
|   } | ||||
|   | ||||
| @@ -58,6 +58,10 @@ export function createDocument( | ||||
|  | ||||
|   log.info("creating document", { name }); | ||||
|  | ||||
|   if (typeof content === "string") { | ||||
|     updateDocument(name, content).catch(log.error); | ||||
|   } | ||||
|  | ||||
|   return fetch(SILVERBULLET_SERVER + "/" + name, { | ||||
|     body: content, | ||||
|     method: "PUT", | ||||
| @@ -65,25 +69,49 @@ export function createDocument( | ||||
|   }); | ||||
| } | ||||
|  | ||||
| export async function getDocument(name: string): Promise<string> { | ||||
|   const documents = await db.select().from(documentTable).where( | ||||
|     eq(documentTable.name, name), | ||||
|   ).limit(1); | ||||
|   if (documents[0]?.content) return documents[0].content; | ||||
|  | ||||
| async function fetchDocument(name: string) { | ||||
|   log.debug("fetching document", { name }); | ||||
|   const headers = new Headers(); | ||||
|   headers.append("X-Sync-Mode", "true"); | ||||
|   const response = await fetch(SILVERBULLET_SERVER + "/" + name, { headers }); | ||||
|   const text = await response.text(); | ||||
|   if (response.status === 404) { | ||||
|     return; | ||||
|   } | ||||
|   return response.text(); | ||||
| } | ||||
|  | ||||
|   await db.update(documentTable).set({ | ||||
|     content: text, | ||||
|   }).where(eq(documentTable.name, name)); | ||||
| export async function getDocument(name: string): Promise<string | undefined> { | ||||
|   const documents = await db.select().from(documentTable).where( | ||||
|     eq(documentTable.name, name), | ||||
|   ).limit(1); | ||||
|   // This updates the document in the background | ||||
|   fetchDocument(name).then((content) => { | ||||
|     if (content) { | ||||
|       updateDocument(name, content); | ||||
|     } else { | ||||
|       db.delete(documentTable).where(eq(documentTable.name, name)); | ||||
|     } | ||||
|   }).catch( | ||||
|     log.error, | ||||
|   ); | ||||
|   if (documents[0]?.content) return documents[0].content; | ||||
|  | ||||
|   const text = await fetchDocument(name); | ||||
|   if (!text) { | ||||
|     db.delete(documentTable).where(eq(documentTable.name, name)); | ||||
|     return; | ||||
|   } | ||||
|   await updateDocument(name, text); | ||||
|  | ||||
|   return text; | ||||
| } | ||||
|  | ||||
| export function updateDocument(name: string, content: string) { | ||||
|   return db.update(documentTable).set({ | ||||
|     content, | ||||
|   }).where(eq(documentTable.name, name)); | ||||
| } | ||||
|  | ||||
| export function transformDocument(input: string, cb: (r: Root) => Root) { | ||||
|   const out = unified() | ||||
|     .use(remarkParse) | ||||
|   | ||||
| @@ -1,7 +1,9 @@ | ||||
| import OpenAI from "https://deno.land/x/openai@v4.52.0/mod.ts"; | ||||
| import OpenAI from "https://deno.land/x/openai@v4.69.0/mod.ts"; | ||||
| import { zodResponseFormat } from "https://deno.land/x/openai@v4.69.0/helpers/zod.ts"; | ||||
| import { OPENAI_API_KEY } from "@lib/env.ts"; | ||||
| import { hashString } from "@lib/helpers.ts"; | ||||
| import { createCache } from "@lib/cache.ts"; | ||||
| import recipeSchema from "@lib/recipeSchema.ts"; | ||||
|  | ||||
| const openAI = OPENAI_API_KEY && new OpenAI({ apiKey: OPENAI_API_KEY }); | ||||
|  | ||||
| @@ -208,3 +210,21 @@ export async function createTags(content: string) { | ||||
|  | ||||
|   return extractListFromResponse(res).map((v) => v.replaceAll(" ", "-")); | ||||
| } | ||||
|  | ||||
| export async function extractRecipe(content: string) { | ||||
|   if (!openAI) return; | ||||
|   const completion = await openAI.beta.chat.completions.parse({ | ||||
|     model: "gpt-4o-2024-08-06", | ||||
|     temperature: 0.1, | ||||
|     messages: [ | ||||
|       { | ||||
|         role: "system", | ||||
|         content: "Extract the recipe information from the provided markdown.", | ||||
|       }, | ||||
|       { role: "user", content }, | ||||
|     ], | ||||
|     response_format: zodResponseFormat(recipeSchema, "recipe-v2"), | ||||
|   }); | ||||
|  | ||||
|   return recipeSchema.parse(completion.choices[0].message.parsed); | ||||
| } | ||||
|   | ||||
							
								
								
									
										35
									
								
								lib/parseIngredient.ts
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										35
									
								
								lib/parseIngredient.ts
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,35 @@ | ||||
| import { parseIngredient as _parseIngredient } from "https://esm.sh/parse-ingredient@1.0.1"; | ||||
|  | ||||
| export function parseIngredient(text: string) { | ||||
|   const ing = _parseIngredient(text, { | ||||
|     additionalUOMs: { | ||||
|       tableSpoon: { | ||||
|         short: "EL", | ||||
|         plural: "Table Spoons", | ||||
|         alternates: ["el", "EL", "Tbsp", "tbsp"], | ||||
|       }, | ||||
|       teaSpoon: { | ||||
|         short: "TL", | ||||
|         plural: "Tea Spoon", | ||||
|         alternates: ["tl", "TL", "Tsp", "tsp", "teaspoon"], | ||||
|       }, | ||||
|       litre: { | ||||
|         short: "L", | ||||
|         plural: "liters", | ||||
|         alternates: ["L", "l"], | ||||
|       }, | ||||
|       paket: { | ||||
|         short: "Paket", | ||||
|         plural: "Pakets", | ||||
|         alternates: ["Paket", "paket"], | ||||
|       }, | ||||
|     }, | ||||
|   }); | ||||
|  | ||||
|   return { | ||||
|     name: ing[0].description, | ||||
|     unit: ing[0].unitOfMeasure || "", | ||||
|     quantity: ing[0].quantity?.toString() || "", | ||||
|     note: "", | ||||
|   }; | ||||
| } | ||||
							
								
								
									
										55
									
								
								lib/playwright.ts
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										55
									
								
								lib/playwright.ts
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,55 @@ | ||||
| import { firefox } from "npm:playwright-extra"; | ||||
| import { createStreamResponse } from "@lib/helpers.ts"; | ||||
| import StealthPlugin from "npm:puppeteer-extra-plugin-stealth"; | ||||
|  | ||||
| const userAgentStrings = [ | ||||
|   "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.2227.0 Safari/537.36", | ||||
|   "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36", | ||||
|   "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.3497.92 Safari/537.36", | ||||
|   "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36", | ||||
| ]; | ||||
|  | ||||
| firefox.use(StealthPlugin()); | ||||
|  | ||||
| export async function fetchHtmlWithPlaywright( | ||||
|   fetchUrl: string, | ||||
|   streamResponse: ReturnType<typeof createStreamResponse>, | ||||
| ): Promise<string> { | ||||
|   streamResponse.enqueue("booting up playwright"); | ||||
|   // Launch the Playwright browser | ||||
|   const browser = await firefox.launch(); | ||||
|  | ||||
|   streamResponse.enqueue("fetching html"); | ||||
|  | ||||
|   try { | ||||
|     // Open a new browser context and page | ||||
|     const context = await browser.newContext({ | ||||
|       userAgent: | ||||
|         userAgentStrings[Math.floor(Math.random() * userAgentStrings.length)], | ||||
|     }); | ||||
|  | ||||
|     //add init script | ||||
|     await context.addInitScript( | ||||
|       "Object.defineProperty(navigator, 'webdriver', {get: () => undefined})", | ||||
|     ); | ||||
|  | ||||
|     const page = await context.newPage(); | ||||
|  | ||||
|     // Navigate to the URL | ||||
|     await page.goto(fetchUrl, { | ||||
|       waitUntil: "domcontentloaded", // Wait for the DOM to load | ||||
|     }); | ||||
|  | ||||
|     // Get the HTML content of the page | ||||
|     const html = await page.content(); | ||||
|  | ||||
|     return html; | ||||
|   } catch (error) { | ||||
|     streamResponse.enqueue("error fetching html"); | ||||
|     console.error(error); | ||||
|     return ""; | ||||
|   } finally { | ||||
|     // Close the browser | ||||
|     await browser.close(); | ||||
|   } | ||||
| } | ||||
							
								
								
									
										39
									
								
								lib/recipeSchema.ts
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										39
									
								
								lib/recipeSchema.ts
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,39 @@ | ||||
| import { z } from "npm:zod"; | ||||
|  | ||||
| export const IngredientSchema = z.object({ | ||||
|   quantity: z.string().describe( | ||||
|     "e.g., '2', '1/2', or an empty string for 'to taste'", | ||||
|   ), | ||||
|   unit: z.string().describe('e.g., "g", "tbsp", "cup"'), | ||||
|   name: z.string().describe('e.g., "sugar", "flour"'), // | ||||
|   note: z.string().describe('optional, e.g., "sifted", "chopped finely"'), | ||||
| }); | ||||
| export type Ingredient = z.infer<typeof IngredientSchema>; | ||||
|  | ||||
| export const IngredientGroupSchema = z.object({ | ||||
|   name: z.string(), | ||||
|   items: z.array(IngredientSchema), | ||||
| }); | ||||
| export type IngredientGroup = z.infer<typeof IngredientGroupSchema>; | ||||
|  | ||||
| const recipeSchema = z.object({ | ||||
|   title: z.string().describe( | ||||
|     "Title of the Recipe, without the name of the website or author", | ||||
|   ), | ||||
|   image: z.string().describe("URL of the main image of the recipe"), | ||||
|   author: z.string().describe("author of the Recipe (optional)"), | ||||
|   description: z.string().describe("Optional, short description of the recipe"), | ||||
|   ingredients: z.array(z.union([IngredientSchema, IngredientGroupSchema])) | ||||
|     .describe("List of ingredients"), | ||||
|   instructions: z.array(z.string()).describe("List of instructions"), | ||||
|   servings: z.number().describe("Amount of Portions"), | ||||
|   prepTime: z.number().describe("Preparation time in minutes"), | ||||
|   cookTime: z.number().describe("Cooking time in minutes"), | ||||
|   totalTime: z.number().describe("Total time in minutes"), | ||||
|   tags: z.array(z.string()).describe( | ||||
|     "List of tags (e.g., ['vegan', 'dessert'])", | ||||
|   ), | ||||
|   notes: z.array(z.string()).describe("Optional notes about the recipe"), | ||||
| }); | ||||
|  | ||||
| export default recipeSchema; | ||||
| @@ -4,31 +4,22 @@ import { | ||||
|   getTextOfRange, | ||||
|   parseDocument, | ||||
| } from "@lib/documents.ts"; | ||||
| import { parse } from "yaml"; | ||||
| import { parseIngredient } from "https://esm.sh/parse-ingredient@1.0.1"; | ||||
| import { parse, stringify } from "yaml"; | ||||
| import { createCrud } from "@lib/crud.ts"; | ||||
| import { extractHashTags } from "@lib/string.ts"; | ||||
|  | ||||
| export type IngredientGroup = { | ||||
|   name: string; | ||||
|   ingredients: Ingredient[]; | ||||
| }; | ||||
|  | ||||
| export type Ingredient = { | ||||
|   type: string; | ||||
|   unit?: string; | ||||
|   amount?: string; | ||||
| }; | ||||
|  | ||||
| export type Ingredients = (Ingredient | IngredientGroup)[]; | ||||
| import { Ingredient, IngredientGroup } from "@lib/recipeSchema.ts"; | ||||
| import { fixRenderedMarkdown } from "@lib/helpers.ts"; | ||||
| import { parseIngredient } from "@lib/parseIngredient.ts"; | ||||
|  | ||||
| export type Recipe = { | ||||
|   type: "recipe"; | ||||
|   id: string; | ||||
|   name: string; | ||||
|   description?: string; | ||||
|   ingredients: Ingredients; | ||||
|   preparation?: string; | ||||
|   markdown?: string; | ||||
|   ingredients: (Ingredient | IngredientGroup)[]; | ||||
|   instructions?: string[]; | ||||
|   notes?: string[]; | ||||
|   tags: string[]; | ||||
|   meta?: { | ||||
|     time?: string; | ||||
| @@ -49,38 +40,8 @@ function parseIngredientItem(listItem: DocumentChild): Ingredient | undefined { | ||||
|  | ||||
|     const text = children.map((c) => getTextOfChild(c)).join(" ").trim(); | ||||
|  | ||||
|     const ing = parseIngredient(text, { | ||||
|       additionalUOMs: { | ||||
|         tableSpoon: { | ||||
|           short: "EL", | ||||
|           plural: "Table Spoons", | ||||
|           alternates: ["el", "EL", "Tbsp", "tbsp"], | ||||
|         }, | ||||
|         teaSpoon: { | ||||
|           short: "TL", | ||||
|           plural: "Tea Spoon", | ||||
|           alternates: ["tl", "TL", "Tsp", "tsp", "teaspoon"], | ||||
|         }, | ||||
|         litre: { | ||||
|           short: "L", | ||||
|           plural: "liters", | ||||
|           alternates: ["L", "l"], | ||||
|         }, | ||||
|         paket: { | ||||
|           short: "Paket", | ||||
|           plural: "Pakets", | ||||
|           alternates: ["Paket", "paket"], | ||||
|         }, | ||||
|       }, | ||||
|     }); | ||||
|  | ||||
|     return { | ||||
|       type: ing[0].description, | ||||
|       unit: ing[0].unitOfMeasure, | ||||
|       amount: ing[0].quantity, | ||||
|     }; | ||||
|     return parseIngredient(text); | ||||
|   } | ||||
|   return; | ||||
| } | ||||
|  | ||||
| const isIngredient = (item: Ingredient | undefined): item is Ingredient => { | ||||
| @@ -112,9 +73,10 @@ function parseIngredients(children: DocumentChild[]): Recipe["ingredients"] { | ||||
|  | ||||
|       if (!nextChild || nextChild.type !== "list") continue; | ||||
|  | ||||
|       const name = getTextOfChild(child); | ||||
|       ingredients.push({ | ||||
|         name: getTextOfChild(child) || "", | ||||
|         ingredients: parseIngredientsList(nextChild), | ||||
|         name: name || "", | ||||
|         items: parseIngredientsList(nextChild), | ||||
|       }); | ||||
|       skip = true; | ||||
|       continue; | ||||
| @@ -128,6 +90,19 @@ function parseIngredients(children: DocumentChild[]): Recipe["ingredients"] { | ||||
|   return ingredients; | ||||
| } | ||||
|  | ||||
| function extractSteps( | ||||
|   content: string, | ||||
|   seperator: RegExp = /\n(?=\d+\.)/g, | ||||
| ): string[] { | ||||
|   const steps = content.split(seperator).map((step) => { | ||||
|     const match = step.match(/^(\d+)\.\s*(.*)/); | ||||
|     if (!match) return; | ||||
|     const [, , text] = match; | ||||
|     return text; | ||||
|   }).filter((step) => !!step); | ||||
|   return steps as string[]; | ||||
| } | ||||
|  | ||||
| export function parseRecipe(original: string, id: string): Recipe { | ||||
|   const doc = parseDocument(original); | ||||
|  | ||||
| @@ -140,8 +115,8 @@ export function parseRecipe(original: string, id: string): Recipe { | ||||
|     if (child.type === "yaml") { | ||||
|       try { | ||||
|         meta = parse(child.value) as Recipe["meta"]; | ||||
|       } catch (_) { | ||||
|         // console.log("Error parsing YAML", err); | ||||
|       } catch (err) { | ||||
|         console.log("Error parsing YAML", err); | ||||
|       } | ||||
|       continue; | ||||
|     } | ||||
| @@ -168,7 +143,14 @@ export function parseRecipe(original: string, id: string): Recipe { | ||||
|  | ||||
|   const ingredients = parseIngredients(groups[1]); | ||||
|  | ||||
|   const preparation = getTextOfRange(groups[2], original); | ||||
|   const instructionText = getTextOfRange(groups[2], original); | ||||
|   let instructions = extractSteps(instructionText || ""); | ||||
|   if (instructions.length <= 1) { | ||||
|     const d = extractSteps(instructionText || "", /\n/g); | ||||
|     if (d.length > instructions.length) { | ||||
|       instructions = d; | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   const tags = extractHashTags(description || ""); | ||||
|   if (description) { | ||||
| @@ -183,15 +165,88 @@ export function parseRecipe(original: string, id: string): Recipe { | ||||
|     meta, | ||||
|     name, | ||||
|     tags, | ||||
|     markdown: original, | ||||
|     notes: getTextOfRange(groups[3], original)?.split("\n"), | ||||
|     description, | ||||
|     ingredients, | ||||
|     preparation, | ||||
|     instructions, | ||||
|   }; | ||||
| } | ||||
|  | ||||
| function filterUndefinedFromObject<T extends { [key: string]: unknown }>( | ||||
|   obj: T, | ||||
| ) { | ||||
|   return Object.fromEntries( | ||||
|     Object.entries(obj).filter(([_, v]) => v !== undefined), | ||||
|   ); | ||||
| } | ||||
|  | ||||
| export function renderRecipe(recipe: Recipe) { | ||||
|   const meta = filterUndefinedFromObject(recipe.meta || {}); | ||||
|  | ||||
|   // Clean up meta properties | ||||
|   delete meta.thumbnail; | ||||
|   delete meta.average; | ||||
|  | ||||
|   const recipeImage = meta.image ? `` : ""; | ||||
|  | ||||
|   // Format ingredient groups and standalone ingredients | ||||
|   const ingredients = recipe.ingredients | ||||
|     .map((item) => { | ||||
|       if ("items" in item) { | ||||
|         return `\n*${item.name}*\n${ | ||||
|           item.items | ||||
|             .map((ing) => { | ||||
|               if (ing.quantity && ing.unit) { | ||||
|                 return `- **${ing.quantity.trim() || ""}${ | ||||
|                   ing.unit.trim() || "" | ||||
|                 }** ${ing.name}`; | ||||
|               } | ||||
|               return `- ${ing.name}`; | ||||
|             }) | ||||
|             .join("\n") | ||||
|         }`; | ||||
|       } | ||||
|       if (item.quantity && item.unit) { | ||||
|         return `- **${item.quantity?.trim() || ""}${ | ||||
|           item.unit?.trim() || "" | ||||
|         }** ${item.name}`; | ||||
|       } | ||||
|       return `- ${item.name}`; | ||||
|     }) | ||||
|     .join("\n"); | ||||
|  | ||||
|   // Format instructions as a numbered list | ||||
|   const instructions = recipe.instructions | ||||
|     ? recipe.instructions.map((step, i) => `${i + 1}. ${step}`).join("\n") | ||||
|     : ""; | ||||
|  | ||||
|   // Render the final markdown | ||||
|   return fixRenderedMarkdown(`${ | ||||
|     Object.keys(meta).length | ||||
|       ? `--- | ||||
| ${stringify(meta)} | ||||
| ---` | ||||
|       : `--- | ||||
| ---` | ||||
|   } | ||||
| # ${recipe.name} | ||||
| ${recipe.meta?.image ? recipeImage : ""} | ||||
| ${recipe.tags.map((t) => `#${t.replaceAll(" ", "-")}`).join(" ")} | ||||
| ${recipe.description || ""} | ||||
|  | ||||
| --- | ||||
|  | ||||
| ${ingredients ? `## Ingredients\n\n${ingredients}\n\n---\n` : ""} | ||||
| ${instructions ? `${instructions}\n\n---` : ""} | ||||
| ${recipe.notes?.length ? `\n${recipe.notes.join("\n")}` : ""} | ||||
| `); | ||||
| } | ||||
|  | ||||
| const crud = createCrud<Recipe>({ | ||||
|   prefix: `Recipes/`, | ||||
|   parse: parseRecipe, | ||||
|   render: renderRecipe, | ||||
|   hasThumbnails: true, | ||||
| }); | ||||
|  | ||||
|   | ||||
| @@ -6,17 +6,10 @@ export function formatDate(date: Date): string { | ||||
| } | ||||
|  | ||||
| export function safeFileName(inputString: string): string { | ||||
|   // Convert the string to lowercase | ||||
|   let fileName = inputString.toLowerCase(); | ||||
|  | ||||
|   // Replace spaces with underscores | ||||
|   fileName = fileName.replace(/ /g, "_"); | ||||
|  | ||||
|   // Remove characters that are not safe for file names | ||||
|   fileName = fileName.replace(/[^\w.-]/g, ""); | ||||
|  | ||||
|   fileName = fileName.replaceAll(":", ""); | ||||
|  | ||||
|   return fileName; | ||||
| } | ||||
|  | ||||
|   | ||||
| @@ -1,5 +1,5 @@ | ||||
| import { Head } from "$fresh/runtime.ts"; | ||||
| import Image from "@components/Image.tsx"; | ||||
| import { MainLayout } from "@components/layouts/main.tsx"; | ||||
|  | ||||
| export default function Error404() { | ||||
|   return ( | ||||
| @@ -7,22 +7,17 @@ export default function Error404() { | ||||
|       <Head> | ||||
|         <title>404 - Page not found</title> | ||||
|       </Head> | ||||
|       <div class="px-4 py-8 mx-auto bg-[#86efac]"> | ||||
|         <div class="max-w-screen-md mx-auto flex flex-col items-center justify-center"> | ||||
|           <Image | ||||
|             class="my-6" | ||||
|             src="/logo.svg" | ||||
|             width="128" | ||||
|             height="128" | ||||
|             alt="the fresh logo: a sliced lemon dripping with juice" | ||||
|           /> | ||||
|           <h1 class="text-4xl font-bold">404 - Page not found</h1> | ||||
|           <p class="my-4"> | ||||
|             The page you were looking for doesn't exist. | ||||
|           </p> | ||||
|           <a href="/" class="underline">Go back home</a> | ||||
|       <MainLayout> | ||||
|         <div class="px-8 text-white mt-10"> | ||||
|           <div class="max-w-screen-md mx-auto flex flex-col items-center justify-center"> | ||||
|             <h1 class="text-4xl font-bold">404 - Page not found</h1> | ||||
|             <p class="my-4"> | ||||
|               The page you were looking for doesn't exist. | ||||
|             </p> | ||||
|             <a href="/" class="underline">Go back home</a> | ||||
|           </div> | ||||
|         </div> | ||||
|       </div> | ||||
|       </MainLayout> | ||||
|     </> | ||||
|   ); | ||||
| } | ||||
|   | ||||
| @@ -1,6 +1,6 @@ | ||||
| import { Handlers } from "$fresh/server.ts"; | ||||
| import { Readability } from "https://cdn.skypack.dev/@mozilla/readability"; | ||||
| import { DOMParser } from "https://deno.land/x/deno_dom@v0.1.38/deno-dom-wasm.ts"; | ||||
| import { DOMParser } from "domparser"; | ||||
| import { AccessDeniedError, BadRequestError } from "@lib/errors.ts"; | ||||
| import { createStreamResponse, isValidUrl } from "@lib/helpers.ts"; | ||||
| import * as openai from "@lib/openai.ts"; | ||||
|   | ||||
							
								
								
									
										12
									
								
								routes/api/cache.ts
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										12
									
								
								routes/api/cache.ts
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,12 @@ | ||||
| import { Handlers } from "$fresh/server.ts"; | ||||
| import { documentTable } from "@lib/db/schema.ts"; | ||||
| import { db } from "@lib/db/sqlite.ts"; | ||||
| import { json } from "@lib/helpers.ts"; | ||||
|  | ||||
| export const handler: Handlers = { | ||||
|   async DELETE() { | ||||
|     await db.delete(documentTable).run(); | ||||
|     return json({ status: "ok" }); | ||||
|   }, | ||||
| }; | ||||
|  | ||||
| @@ -71,7 +71,6 @@ const POST = async ( | ||||
|   if (posterPath && !movie.meta?.image) { | ||||
|     const poster = await tmdb.getMoviePoster(posterPath); | ||||
|     const extension = fileExtension(posterPath); | ||||
|  | ||||
|     finalPath = `Media/movies/images/${safeFileName(name)}_cover.${extension}`; | ||||
|     await createDocument(finalPath, poster); | ||||
|     movie.meta = movie.meta || {}; | ||||
|   | ||||
							
								
								
									
										264
									
								
								routes/api/recipes/create/index.ts
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										264
									
								
								routes/api/recipes/create/index.ts
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,264 @@ | ||||
| import { Handlers } from "$fresh/server.ts"; | ||||
| import { Readability } from "https://cdn.skypack.dev/@mozilla/readability"; | ||||
| import { DOMParser } from "domparser"; | ||||
| import { AccessDeniedError, BadRequestError } from "@lib/errors.ts"; | ||||
| import { createStreamResponse, isValidUrl } from "@lib/helpers.ts"; | ||||
| import * as openai from "@lib/openai.ts"; | ||||
| import tds from "https://cdn.skypack.dev/turndown@7.2.0"; | ||||
| import { createLogger } from "@lib/log.ts"; | ||||
| import { createRecipe, Recipe } from "@lib/resource/recipes.ts"; | ||||
| import recipeSchema from "@lib/recipeSchema.ts"; | ||||
| import { fileExtension } from "https://deno.land/x/file_extension@v2.1.0/mod.ts"; | ||||
| import { safeFileName } from "@lib/string.ts"; | ||||
| import { createDocument } from "@lib/documents.ts"; | ||||
| import { parseJsonLdToRecipeSchema } from "./parseJsonLd.ts"; | ||||
| import z from "npm:zod"; | ||||
| import { fetchHtmlWithPlaywright } from "@lib/playwright.ts"; | ||||
|  | ||||
| const parser = new DOMParser(); | ||||
|  | ||||
| const log = createLogger("api/article"); | ||||
|  | ||||
| function makeUrlAbsolute(url: URL, src: string) { | ||||
|   if (src.startsWith("/")) { | ||||
|     return `${url.origin}${src.replace(/$\//, "")}`; | ||||
|   } | ||||
|  | ||||
|   if (!src.startsWith("https://") && !src.startsWith("http://")) { | ||||
|     return `${url.origin.replace(/\/$/, "")}/${src.replace(/^\//, "")})`; | ||||
|   } | ||||
|  | ||||
|   return src; | ||||
| } | ||||
|  | ||||
| async function extractUsingAI( | ||||
|   url: URL, | ||||
|   document: Parameters<typeof Readability>[0] | null, | ||||
|   streamResponse: ReturnType<typeof createStreamResponse>, | ||||
| ) { | ||||
|   const readable = new Readability(document); | ||||
|  | ||||
|   const result = readable.parse(); | ||||
|  | ||||
|   const service = new tds({ | ||||
|     headingStyle: "atx", | ||||
|     codeBlockStyle: "fenced", | ||||
|     hr: "---", | ||||
|     bulletListMarker: "-", | ||||
|   }); | ||||
|  | ||||
|   service.addRule("fix image links", { | ||||
|     filter: ["img"], | ||||
|     replacement: function (_: string, node: HTMLImageElement) { | ||||
|       const src = node.getAttribute("src"); | ||||
|       const alt = node.getAttribute("alt") || ""; | ||||
|       if (!src || src.startsWith("data:image")) return ""; | ||||
|  | ||||
|       return `})`; | ||||
|     }, | ||||
|   }); | ||||
|   service.addRule("fix normal links", { | ||||
|     filter: ["a"], | ||||
|     replacement: function (content: string, node: HTMLImageElement) { | ||||
|       const href = node.getAttribute("href"); | ||||
|       if (!href) return content; | ||||
|  | ||||
|       if (href.startsWith("/")) { | ||||
|         return `[${content}](${url.origin}${href.replace(/$\//, "")})`; | ||||
|       } | ||||
|  | ||||
|       if (href.startsWith("#")) { | ||||
|         if (content.length < 2) return ""; | ||||
|         return `[${content}](${url.href}#${href})`.replace("##", "#"); | ||||
|       } | ||||
|  | ||||
|       if (!href.startsWith("https://") && !href.startsWith("http://")) { | ||||
|         return `[${content}](${url.origin.replace(/\/$/, "")}/${ | ||||
|           href.replace(/^\//, "") | ||||
|         })`; | ||||
|       } | ||||
|  | ||||
|       return `[${content}](${href})`; | ||||
|     }, | ||||
|   }); | ||||
|  | ||||
|   const cleanDocument = parser.parseFromString( | ||||
|     result.content, | ||||
|     "text/html", | ||||
|   ); | ||||
|  | ||||
|   const markdown = service.turndown(cleanDocument); | ||||
|  | ||||
|   streamResponse.enqueue("extracting recipe with openai"); | ||||
|   console.log("------- MARKDOWN ------"); | ||||
|   console.log(markdown); | ||||
|   console.log("-----------------------"); | ||||
|  | ||||
|   const recipe = await openai.extractRecipe(markdown); | ||||
|   console.log("------- EXTRACTED ------"); | ||||
|   console.log(JSON.stringify(recipe, null, 2)); | ||||
|   console.log("-----------------------"); | ||||
|  | ||||
|   return recipe; | ||||
| } | ||||
|  | ||||
| async function processCreateRecipeFromUrl( | ||||
|   { fetchUrl, streamResponse }: { | ||||
|     fetchUrl: string; | ||||
|     streamResponse: ReturnType<typeof createStreamResponse>; | ||||
|   }, | ||||
| ) { | ||||
|   log.info("create article from url", { url: fetchUrl }); | ||||
|   const url = new URL(fetchUrl); | ||||
|  | ||||
|   streamResponse.enqueue("downloading article"); | ||||
|  | ||||
|   const html = await fetchHtmlWithPlaywright(fetchUrl, streamResponse); | ||||
|  | ||||
|   streamResponse.enqueue("download success"); | ||||
|   Deno.writeTextFile("article.html", html); | ||||
|  | ||||
|   const document = parser.parseFromString(html, "text/html"); | ||||
|  | ||||
|   const title = document?.querySelector("title")?.innerText; | ||||
|  | ||||
|   const images: HTMLImageElement[] = []; | ||||
|   document?.querySelectorAll("img").forEach((img) => { | ||||
|     images.push(img as unknown as HTMLImageElement); | ||||
|   }); | ||||
|  | ||||
|   const metaAuthor = | ||||
|     document?.querySelector('meta[name="twitter:creator"]')?.getAttribute( | ||||
|       "content", | ||||
|     ) || | ||||
|     document?.querySelector('meta[name="author"]')?.getAttribute("content"); | ||||
|  | ||||
|   const jsonLds = Array.from( | ||||
|     document?.querySelectorAll( | ||||
|       "script[type='application/ld+json']", | ||||
|     ) as HTMLScriptElement[], | ||||
|   ); | ||||
|  | ||||
|   let recipe: z.infer<typeof recipeSchema> | undefined = undefined; | ||||
|   if (jsonLds.length > 0) { | ||||
|     for (const jsonLd of jsonLds) { | ||||
|       console.log({ content: jsonLd.textContent }); | ||||
|       recipe = parseJsonLdToRecipeSchema(jsonLd.textContent || ""); | ||||
|       if (recipe) break; | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   if (!recipe) { | ||||
|     recipe = await extractUsingAI(url, document, streamResponse); | ||||
|   } | ||||
|  | ||||
|   const id = (recipe?.title || title || "").replaceAll(" ", "-"); | ||||
|  | ||||
|   if (!recipe) { | ||||
|     streamResponse.enqueue("failed to parse recipe"); | ||||
|     streamResponse.cancel(); | ||||
|     return; | ||||
|   } | ||||
|   if (!recipe.image) { | ||||
|     const largestImage = images.filter((img) => { | ||||
|       const src = img.getAttribute("src"); | ||||
|       return !!src && !src.startsWith("data:"); | ||||
|     }).sort((a, b) => { | ||||
|       const aSize = +(a.getAttribute("width") || 0) + | ||||
|         +(a.getAttribute("height") || 0); | ||||
|       const bSize = +(b.getAttribute("width") || 0) + | ||||
|         +(b.getAttribute("height") || 0); | ||||
|       return aSize > bSize ? -1 : 1; | ||||
|     })[0]; | ||||
|     const src = largestImage.getAttribute("src"); | ||||
|     if (src) { | ||||
|       recipe.image = makeUrlAbsolute(url, src); | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   if (!recipe) { | ||||
|     console.error("Failed to parse recipe"); | ||||
|     streamResponse.enqueue("failed to parse recipe"); | ||||
|     streamResponse.cancel(); | ||||
|     return; | ||||
|   } | ||||
|  | ||||
|   const newRecipe: Recipe = { | ||||
|     type: "recipe", | ||||
|     id, | ||||
|     name: recipe?.title || title || "", | ||||
|     description: recipe?.description, | ||||
|     ingredients: recipe?.ingredients || [], | ||||
|     instructions: recipe?.instructions || [], | ||||
|     notes: recipe?.notes, | ||||
|     tags: recipe.tags || [], | ||||
|     meta: { | ||||
|       image: recipe?.image, | ||||
|       time: recipe?.totalTime | ||||
|         ? `${recipe?.totalTime?.toString()} minutes` | ||||
|         : undefined, | ||||
|       link: fetchUrl, | ||||
|       portion: recipe?.servings, | ||||
|       author: metaAuthor ?? recipe?.author, | ||||
|     }, | ||||
|   }; | ||||
|  | ||||
|   if (newRecipe.meta?.image) { | ||||
|     const src = makeUrlAbsolute(url, newRecipe.meta.image); | ||||
|     if (src?.length > 5) { | ||||
|       const extension = fileExtension(new URL(src).pathname); | ||||
|       const finalPath = `Media/articles/images/${ | ||||
|         safeFileName(id) | ||||
|       }_cover.${extension}`; | ||||
|       streamResponse.enqueue("downloading image"); | ||||
|       try { | ||||
|         streamResponse.enqueue("downloading image"); | ||||
|         const res = await fetch(src); | ||||
|         streamResponse.enqueue("saving image"); | ||||
|         const buffer = await res.arrayBuffer(); | ||||
|         await createDocument(finalPath, buffer); | ||||
|         newRecipe.meta.image = finalPath; | ||||
|       } catch (err) { | ||||
|         console.log("Failed to save image", err); | ||||
|       } | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   streamResponse.enqueue("finished processing, creating file"); | ||||
|  | ||||
|   console.log("------- CREATING ------"); | ||||
|   console.log(JSON.stringify(recipe, null, 2)); | ||||
|   console.log("-----------------------"); | ||||
|  | ||||
|   await createRecipe(newRecipe.id, newRecipe); | ||||
|  | ||||
|   streamResponse.enqueue("id: " + newRecipe.id); | ||||
| } | ||||
|  | ||||
| export const handler: Handlers = { | ||||
|   GET(req, ctx) { | ||||
|     const session = ctx.state.session; | ||||
|     if (!session) { | ||||
|       throw new AccessDeniedError(); | ||||
|     } | ||||
|  | ||||
|     const url = new URL(req.url); | ||||
|     const fetchUrl = url.searchParams.get("url"); | ||||
|  | ||||
|     if (!fetchUrl || !isValidUrl(fetchUrl)) { | ||||
|       throw new BadRequestError(); | ||||
|     } | ||||
|  | ||||
|     const streamResponse = createStreamResponse(); | ||||
|  | ||||
|     processCreateRecipeFromUrl({ fetchUrl, streamResponse }).then((article) => { | ||||
|       log.debug("created article from link", { article }); | ||||
|     }).catch((err) => { | ||||
|       log.error(err); | ||||
|     }).finally(() => { | ||||
|       streamResponse.cancel(); | ||||
|     }); | ||||
|  | ||||
|     return streamResponse.response; | ||||
|   }, | ||||
| }; | ||||
							
								
								
									
										103
									
								
								routes/api/recipes/create/parseJsonLd.ts
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										103
									
								
								routes/api/recipes/create/parseJsonLd.ts
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,103 @@ | ||||
| import recipeSchema from "@lib/recipeSchema.ts"; | ||||
| import { parseIngredient } from "@lib/parseIngredient.ts"; | ||||
|  | ||||
| export function parseJsonLdToRecipeSchema(jsonLdContent: string) { | ||||
|   try { | ||||
|     let data = JSON.parse(jsonLdContent); | ||||
|  | ||||
|     const image = data.image; | ||||
|  | ||||
|     // Handle nested data inside `mainEntity` | ||||
|     if (data["mainEntity"]) { | ||||
|       data = data["mainEntity"]; | ||||
|     } | ||||
|  | ||||
|     // Ensure it's a valid Recipe type | ||||
|     if ( | ||||
|       typeof data !== "object" || !data["@type"] || data["@type"] !== "Recipe" | ||||
|     ) { | ||||
|       return; | ||||
|     } | ||||
|  | ||||
|     // Map and parse ingredients into the new schema | ||||
|     const ingredients = (data.recipeIngredient || []).map( | ||||
|       parseIngredient, | ||||
|     ); | ||||
|  | ||||
|     const instructions = Array.isArray(data.recipeInstructions) | ||||
|       ? data.recipeInstructions.map((instr) => { | ||||
|         if (typeof instr === "string") return instr; | ||||
|         if (typeof instr === "object" && instr.text) return instr.text; | ||||
|         return ""; | ||||
|       }).filter((instr) => instr.trim() !== "") | ||||
|       : []; | ||||
|  | ||||
|     // Parse servings | ||||
|     const servings = parseServings(data.recipeYield); | ||||
|  | ||||
|     // Parse times | ||||
|     const prepTime = parseDuration(data.prepTime); | ||||
|     const cookTime = parseDuration(data.cookTime); | ||||
|     const totalTime = parseDuration(data.totalTime); | ||||
|  | ||||
|     // Extract tags | ||||
|     const tags = data.keywords | ||||
|       ? Array.isArray(data.keywords) | ||||
|         ? data.keywords | ||||
|         : data.keywords.split(",").map((tag: string) => tag.trim()) | ||||
|       : []; | ||||
|  | ||||
|     // Build the recipe object | ||||
|     const recipe = { | ||||
|       title: data.name || "Unnamed Recipe", | ||||
|       image: pickImage(image || data.image || ""), | ||||
|       author: Array.isArray(data.author) | ||||
|         ? data.author.map((a: any) => a.name).join(", ") | ||||
|         : data.author?.name || "", | ||||
|       description: data.description || "", | ||||
|       ingredients, | ||||
|       instructions, | ||||
|       servings, | ||||
|       prepTime, | ||||
|       cookTime, | ||||
|       totalTime, | ||||
|       tags, | ||||
|       notes: data.notes || [], | ||||
|     }; | ||||
|  | ||||
|     // Validate against the schema | ||||
|     return recipeSchema.parse(recipe); | ||||
|   } catch (error) { | ||||
|     console.error("Invalid JSON-LD content or parsing error:", error); | ||||
|     return undefined; | ||||
|   } | ||||
| } | ||||
|  | ||||
| function pickImage(images: string | string[]): string { | ||||
|   if (Array.isArray(images)) { | ||||
|     return images[0]; | ||||
|   } | ||||
|   return images; | ||||
| } | ||||
|  | ||||
| function parseServings(servingsData: any): number { | ||||
|   if (typeof servingsData === "string") { | ||||
|     const match = servingsData.match(/\d+/); | ||||
|     return match ? parseInt(match[0], 10) : 1; | ||||
|   } | ||||
|   if (typeof servingsData === "number") { | ||||
|     return servingsData; | ||||
|   } | ||||
|   return 1; | ||||
| } | ||||
|  | ||||
| function parseDuration(duration: string | undefined): number { | ||||
|   if (!duration) return 0; | ||||
|  | ||||
|   // Matches ISO 8601 durations (e.g., "PT30M" -> 30 minutes) | ||||
|   const match = duration.match(/PT(?:(\d+)H)?(?:(\d+)M)?/); | ||||
|   const hours = match?.[1] ? parseInt(match[1], 10) : 0; | ||||
|   const minutes = match?.[2] ? parseInt(match[2], 10) : 0; | ||||
|  | ||||
|   return hours * 60 + minutes; | ||||
| } | ||||
| @@ -2,20 +2,65 @@ import { Handlers, PageProps } from "$fresh/server.ts"; | ||||
| import { IngredientsList } from "@islands/IngredientsList.tsx"; | ||||
| import { MainLayout } from "@components/layouts/main.tsx"; | ||||
| import Counter from "@islands/Counter.tsx"; | ||||
| import { useSignal } from "@preact/signals"; | ||||
| import { Signal, useSignal } from "@preact/signals"; | ||||
| import { getRecipe, Recipe } from "@lib/resource/recipes.ts"; | ||||
| import { RedirectSearchHandler } from "@islands/Search.tsx"; | ||||
| import { KMenu } from "@islands/KMenu.tsx"; | ||||
| import PageHero from "@components/PageHero.tsx"; | ||||
| import { Star } from "@components/Stars.tsx"; | ||||
| import { renderMarkdown } from "@lib/documents.ts"; | ||||
|  | ||||
| export const handler: Handlers<{ recipe: Recipe; session: unknown } | null> = { | ||||
|   async GET(_, ctx) { | ||||
|     const recipe = await getRecipe(ctx.params.name); | ||||
|     return ctx.render({ recipe, session: ctx.state.session }); | ||||
|     try { | ||||
|       const recipe = await getRecipe(ctx.params.name); | ||||
|       if (!recipe) { | ||||
|         return ctx.renderNotFound(); | ||||
|       } | ||||
|       return ctx.render({ recipe, session: ctx.state.session }); | ||||
|     } catch (_e) { | ||||
|       return ctx.renderNotFound(); | ||||
|     } | ||||
|   }, | ||||
| }; | ||||
|  | ||||
| function isValidRecipe(recipe: Recipe | null) { | ||||
|   return recipe?.ingredients?.length && recipe?.instructions?.length && | ||||
|     recipe.name?.length; | ||||
| } | ||||
|  | ||||
| function ValidRecipe({ | ||||
|   recipe, | ||||
|   amount, | ||||
|   portion, | ||||
| }: { recipe: Recipe; amount: Signal<number>; portion: number }) { | ||||
|   return ( | ||||
|     <> | ||||
|       <div class="flex items-center gap-8"> | ||||
|         <h3 class="text-3xl my-5">Ingredients</h3> | ||||
|         {portion && <Counter count={amount} />} | ||||
|       </div> | ||||
|       <IngredientsList | ||||
|         ingredients={recipe.ingredients} | ||||
|         amount={amount} | ||||
|         portion={portion} | ||||
|       /> | ||||
|       <h3 class="text-3xl my-5">Preparation</h3> | ||||
|       <ol class="list-decimal grid gap-4"> | ||||
|         {recipe.instructions && (recipe.instructions.map((instruction) => { | ||||
|           return ( | ||||
|             <li | ||||
|               dangerouslySetInnerHTML={{ | ||||
|                 __html: renderMarkdown(instruction), | ||||
|               }} | ||||
|             /> | ||||
|           ); | ||||
|         }))} | ||||
|       </ol> | ||||
|     </> | ||||
|   ); | ||||
| } | ||||
|  | ||||
| export default function Greet( | ||||
|   props: PageProps<{ recipe: Recipe; session: Record<string, string> }>, | ||||
| ) { | ||||
| @@ -47,7 +92,9 @@ export default function Greet( | ||||
|           )} | ||||
|         </PageHero.Header> | ||||
|         <PageHero.Footer> | ||||
|           <PageHero.Title>{recipe.name}</PageHero.Title> | ||||
|           <PageHero.Title link={recipe.meta?.link}> | ||||
|             {recipe.name} | ||||
|           </PageHero.Title> | ||||
|           <PageHero.Subline | ||||
|             entries={subline} | ||||
|           > | ||||
| @@ -55,23 +102,23 @@ export default function Greet( | ||||
|           </PageHero.Subline> | ||||
|         </PageHero.Footer> | ||||
|       </PageHero> | ||||
|  | ||||
|       <div class="px-8 text-white mt-10"> | ||||
|         <div class="flex items-center gap-8"> | ||||
|           <h3 class="text-3xl my-5">Ingredients</h3> | ||||
|           {portion && <Counter count={amount} />} | ||||
|         </div> | ||||
|         <IngredientsList | ||||
|           ingredients={recipe.ingredients} | ||||
|           amount={amount} | ||||
|           portion={portion} | ||||
|         /> | ||||
|         <h3 class="text-3xl my-5">Preparation</h3> | ||||
|         <pre | ||||
|           class="whitespace-break-spaces" | ||||
|           dangerouslySetInnerHTML={{ __html: recipe.preparation || "" }} | ||||
|         > | ||||
|           {recipe.preparation} | ||||
|         </pre> | ||||
|         {isValidRecipe(recipe) | ||||
|           ? ( | ||||
|             <ValidRecipe | ||||
|               recipe={recipe} | ||||
|               amount={amount} | ||||
|               portion={portion || 1} | ||||
|             /> | ||||
|           ) | ||||
|           : ( | ||||
|             <div | ||||
|               dangerouslySetInnerHTML={{ | ||||
|                 __html: renderMarkdown(recipe?.markdown || ""), | ||||
|               }} | ||||
|             /> | ||||
|           )} | ||||
|       </div> | ||||
|     </MainLayout> | ||||
|   ); | ||||
|   | ||||
		Reference in New Issue
	
	Block a user