feat: store image metadata in sqlite and images on disk

This commit is contained in:
2025-01-05 21:27:31 +01:00
parent bf7d88a588
commit 20a2781214
18 changed files with 641 additions and 379 deletions

473
lib/cache/image.ts vendored
View File

@ -1,183 +1,326 @@
import { hash, isLocalImage, rgbToHex } from "@lib/string.ts";
import * as cache from "@lib/cache/cache.ts";
import {
ImageMagick,
MagickGeometry,
} from "https://deno.land/x/imagemagick_deno@0.0.25/mod.ts";
import { rgbToHex } from "@lib/string.ts";
import { createLogger } from "@lib/log.ts";
import { generateThumbhash } from "@lib/thumbhash.ts";
import { SILVERBULLET_SERVER } from "@lib/env.ts";
import { parseMediaType } from "https://deno.land/std@0.224.0/media_types/parse_media_type.ts";
import path from "node:path";
import { ensureDir } from "https://deno.land/std@0.216.0/fs/mod.ts";
import { DATA_DIR } from "@lib/env.ts";
import { db } from "@lib/sqlite/sqlite.ts";
import { imageTable } from "@lib/sqlite/schema.ts";
import { eq } from "drizzle-orm";
import sharp from "npm:sharp@next";
type ImageCacheOptionsBasic = {
url: string;
mediaType?: string;
};
interface ImageCacheOptionsDimensions extends ImageCacheOptionsBasic {
width: number;
height: number;
}
interface ImageCacheOptionsSuffix extends ImageCacheOptionsBasic {
suffix: string;
}
type ImageCacheOptions = ImageCacheOptionsDimensions | ImageCacheOptionsSuffix;
const CACHE_KEY = "images";
const log = createLogger("cache/image");
function getCacheKey(
opts: ImageCacheOptions,
) {
const isLocal = isLocalImage(opts.url);
const url = new URL(
isLocal ? `${SILVERBULLET_SERVER}/${opts.url}` : opts.url,
);
const imageDir = path.join(DATA_DIR, "images");
await ensureDir(imageDir);
const _suffix = "suffix" in opts
? opts.suffix
: `${opts.width}:${opts.height}`;
async function getRemoteImage(imageUrl: string) {
try {
const sourceRes = await fetch(imageUrl);
const cacheId = `${CACHE_KEY}:${url.hostname}:${
url.pathname.replaceAll("/", ":")
}:${_suffix}`
.replace(
"::",
":",
);
return cacheId;
}
export function createThumbhash(
image: Uint8Array,
url: string,
): Promise<string> {
return new Promise((res, rej) => {
try {
ImageMagick.read(image.slice(), (_image) => {
_image.resize(new MagickGeometry(100, 100));
_image.getPixels((pixels) => {
const bytes = pixels.toByteArray(
0,
0,
_image.width,
_image.height,
"RGBA",
);
if (!bytes) return;
const [hash, average] = generateThumbhash(
bytes,
_image.width,
_image.height,
);
if (average) {
cache.set(
getCacheKey({
url,
suffix: "average",
}),
rgbToHex(average.r, average.g, average.b),
);
}
if (hash) {
const b64 = btoa(String.fromCharCode(...hash));
cache.set(
getCacheKey({
url,
suffix: "thumbnail",
}),
b64,
);
res(b64);
}
});
});
} catch (err) {
rej(err);
if (!sourceRes.ok) {
throw new Error(
`Failed to retrieve image from URL: ${imageUrl}. Status: ${sourceRes.status}`,
);
}
});
}
function verifyImage(
imageBuffer: Uint8Array,
) {
return new Promise<boolean>((resolve) => {
try {
ImageMagick.read(imageBuffer, (image) => {
resolve(image.height !== 0 && image.width !== 0);
});
} catch (_err) {
resolve(false);
const contentType = sourceRes.headers.get("Content-Type");
if (!contentType) {
throw new Error("No Content-Type header in response");
}
});
const mediaType = parseMediaType(contentType)[0];
if (mediaType.split("/")[0] !== "image") {
throw new Error("URL does not return an image type");
}
log.debug("Fetching image", { imageUrl, mediaType });
const buffer = await sourceRes.arrayBuffer();
if (buffer.byteLength === 0) {
throw new Error("Received empty image buffer");
}
return {
buffer,
mediaType,
};
} catch (error) {
throw error;
}
}
export function getThumbhash({ url }: { url: string }) {
return Promise.all(
[
cache.get<Uint8Array>(
getCacheKey({
url,
suffix: "thumbnail",
}),
),
cache.get<string>(
getCacheKey({
url,
suffix: "average",
}),
),
] as const,
);
}
export async function getImage(opts: ImageCacheOptions) {
const cacheKey = getCacheKey(opts);
const pointerCacheRaw = await cache.get<string>(cacheKey);
if (!pointerCacheRaw) return;
const pointerCache = typeof pointerCacheRaw === "string"
? JSON.parse(pointerCacheRaw)
: pointerCacheRaw;
const imageContent = await cache.get(`image:${pointerCache.id}`, true);
if (!imageContent) return;
return {
...pointerCache,
buffer: imageContent,
};
}
export async function setImage(
buffer: Uint8Array,
opts: ImageCacheOptions,
/**
* Calculates dimensions maintaining aspect ratio
*/
function calculateDimensions(
current: { width: number; height: number },
target: { width?: number; height?: number },
) {
const clone = new Uint8Array(buffer);
const imageCorrect = await verifyImage(clone);
if (!imageCorrect) {
log.info("failed to store image", { url: opts.url });
return;
if (!target.width && !target.height) {
return current;
}
const cacheKey = getCacheKey(opts);
const pointerId = await hash(cacheKey);
const ratio = current.width / current.height;
await cache.set(`image:${pointerId}`, clone, { expires: 60 * 60 * 24 });
if (target.width && target.height) {
// Both dimensions specified - maintain aspect ratio using the most constraining dimension
const widthRatio = target.width / current.width;
const heightRatio = target.height / current.height;
const scale = Math.min(widthRatio, heightRatio);
return {
width: Math.round(current.width * scale),
height: Math.round(current.height * scale),
};
} else if (target.width) {
// Only width specified
return {
width: target.width,
height: Math.round(target.width / ratio),
};
} else if (target.height) {
// Only height specified
return {
width: Math.round(target.height * ratio),
height: target.height,
};
}
await cache.set(
cacheKey,
JSON.stringify({
id: pointerId,
...("suffix" in opts
? { suffix: opts.suffix }
: { width: opts.width, height: opts.height }),
}),
{ expires: 60 * 60 * 24 * 7 /* 1 week */ },
);
return current;
}
async function getLocalImagePath(
url: string,
{ width, height }: { width?: number; height?: number } = {},
) {
const { hostname, pathname } = new URL(url);
let imagePath = path.join(
imageDir,
hostname,
pathname.split("/").filter((s) => s.length).join("-"),
);
await ensureDir(imagePath);
if (width || height) {
imagePath = path.join(imagePath, `${width ?? "-"}x${height ?? "-"}`);
} else {
imagePath = path.join(imagePath, "original");
}
return imagePath;
}
/**
* Retrieves a cached image from local storage
*/
async function getLocalImage(
url: string,
{ width, height }: { width?: number; height?: number } = {},
) {
const imagePath = await getLocalImagePath(url, { width, height });
try {
const fileInfo = await Deno.stat(imagePath);
if (fileInfo?.isFile) {
return Deno.readFile(imagePath);
}
} catch (_) {
// File not found - normal case
}
}
/**
* Stores an image in local cache
*/
async function storeLocalImage(
url: string,
content: ArrayBuffer,
{ width, height }: { width?: number; height?: number } = {},
) {
const isValid = await verifyImage(new Uint8Array(content));
if (!isValid) {
throw new Error("Invalid image content detected during storage");
}
const imagePath = await getLocalImagePath(url, { width, height });
await Deno.writeFile(imagePath, new Uint8Array(content));
}
/**
* Resizes an image using Sharp with proper error handling
*/
async function resizeImage(
imageBuffer: Uint8Array,
params: {
width?: number;
height?: number;
mediaType: string;
},
) {
try {
log.debug("Resizing image", { params });
let sharpInstance = sharp(imageBuffer);
// Get original dimensions
const metadata = await sharpInstance.metadata();
if (!metadata.width || !metadata.height) {
throw new Error("Could not determine image dimensions");
}
// Calculate new dimensions
const newDimensions = calculateDimensions(
{ width: metadata.width, height: metadata.height },
params,
);
switch (params.mediaType) {
case "image/webp":
sharpInstance = sharpInstance.webp({ quality: 85 });
break;
case "image/png":
sharpInstance = sharpInstance.png({ quality: 85 });
break;
case "image/jpeg":
default:
sharpInstance = sharpInstance.jpeg({ quality: 85 });
break;
}
// Perform resize with proper options
const resized = await sharpInstance
.resize({
width: newDimensions.width,
height: newDimensions.height,
fit: "inside",
withoutEnlargement: true,
})
.toBuffer();
return new Uint8Array(resized);
} catch (error) {
log.error("Error during image resize:", error);
throw error;
}
}
/**
* Creates a thumbhash for image preview
*/
async function createThumbhash(
image: Uint8Array,
): Promise<{ hash: string; average: string }> {
try {
const resizedImage = await sharp(image)
.resize(100, 100, { fit: "cover" }) // Keep aspect ratio within bounds
.toFormat("png")
.ensureAlpha()
.raw()
.toBuffer();
const [hash, average] = generateThumbhash(resizedImage, 100, 100);
return {
hash: btoa(String.fromCharCode(...hash)),
average: rgbToHex(average.r, average.g, average.b),
};
} catch (err) {
throw new Error(`Failed to create thumbhash: ${err}`);
}
}
/**
* Verifies that an image buffer contains valid image data
*/
async function verifyImage(imageBuffer: Uint8Array): Promise<boolean> {
try {
const metadata = await sharp(imageBuffer).metadata();
return !!(metadata.width && metadata.height && metadata.format);
} catch (error) {
log.error("Image verification failed:", error);
return false;
}
}
/**
* Gets image content with proper caching and resizing
*/
export async function getImageContent(
url: string,
{ width, height }: { width?: number; height?: number } = {},
): Promise<{ content: ArrayBuffer; mimeType: string }> {
log.debug("Getting image content", { url, width, height });
// Check if we have the image metadata in database
const image = await getImage(url);
// Try to get cached resized version
const cachedImage = await getLocalImage(url, { width, height });
if (cachedImage) {
return { content: cachedImage, mimeType: image.mime };
}
// Try to get cached original version
let originalImage = await getLocalImage(url);
// Fetch and cache original if needed
if (!originalImage) {
const fetchedImage = await getRemoteImage(url);
await storeLocalImage(url, fetchedImage.buffer);
originalImage = new Uint8Array(fetchedImage.buffer);
}
// Resize image
const resizedImage = await resizeImage(originalImage, {
width,
height,
mediaType: image.mime,
});
// Cache resized version
await storeLocalImage(url, resizedImage, { width, height });
return { content: resizedImage, mimeType: image.mime };
}
/**
* Gets or creates image metadata in database
*/
export async function getImage(url: string) {
log.debug("Getting image metadata", { url });
try {
// Check database first
const image = await db.select().from(imageTable)
.where(eq(imageTable.url, url))
.limit(1)
.then((images) => images[0]);
if (image) {
return image;
}
// Fetch and process new image
const imageContent = await getRemoteImage(url);
await storeLocalImage(url, imageContent.buffer);
// Generate thumbhash
const thumbhash = await createThumbhash(
new Uint8Array(imageContent.buffer),
);
// Store in database
const [newImage] = await db.insert(imageTable).values({
url: url,
blurhash: thumbhash.hash,
average: thumbhash.average,
mime: imageContent.mediaType,
}).returning();
return newImage;
} catch (error) {
log.error("Error in getImage:", error);
throw error;
}
}

View File

@ -5,25 +5,39 @@ import {
transformDocument,
} from "@lib/documents.ts";
import { Root } from "https://esm.sh/remark-frontmatter@4.0.1";
import { getThumbhash } from "@lib/cache/image.ts";
import { GenericResource } from "@lib/types.ts";
import { parseRating } from "@lib/helpers.ts";
import { isLocalImage } from "@lib/string.ts";
import { SILVERBULLET_SERVER } from "@lib/env.ts";
import { imageTable } from "@lib/sqlite/schema.ts";
import { db } from "@lib/sqlite/sqlite.ts";
import { eq } from "drizzle-orm/sql";
export async function addThumbnailToResource<T = GenericResource>(
export async function addThumbnailToResource<T extends GenericResource>(
res: T,
): Promise<T> {
const imageUrl = res?.meta?.image;
if (!imageUrl) return res;
const [thumbhash, average] = await getThumbhash({ url: imageUrl });
if (!thumbhash) return res;
return {
...res,
meta: {
...res?.meta,
average: average,
thumbnail: thumbhash,
},
};
if (!res?.meta?.image) return res;
const imageUrl = isLocalImage(res.meta.image)
? `${SILVERBULLET_SERVER}/${res.meta.image}`
: res.meta.image;
const image = await db.select().from(imageTable)
.where(eq(imageTable.url, imageUrl))
.limit(1)
.then((images) => images[0]);
if (image) {
return {
...res,
meta: {
...res.meta,
average: image.average,
thumbnail: image.blurhash,
},
};
}
return res;
}
type SortType = "rating" | "date" | "name" | "author";

View File

@ -31,4 +31,4 @@ export const DATA_DIR = Deno.env.has("DATA_DIR")
: path.resolve(Deno.cwd(), "data");
export const LOG_LEVEL: string = Deno.env.get("LOG_LEVEL") ||
"warn";
"debug";

View File

@ -23,9 +23,10 @@ const logFuncs = {
} as const;
let longestScope = 0;
let logLevel = _LOG_LEVEL && _LOG_LEVEL in logMap && _LOG_LEVEL in logMap
let logLevel = (_LOG_LEVEL && _LOG_LEVEL in logMap)
? logMap[_LOG_LEVEL]
: LOG_LEVEL.WARN;
: LOG_LEVEL.DEBUG;
const ee = new EventEmitter<{
log: { level: LOG_LEVEL; scope: string; args: unknown[] };
}>();
@ -41,7 +42,7 @@ type LoggerOptions = {
const createLogFunction =
(scope: string, level: LOG_LEVEL) => (...data: unknown[]) => {
ee.emit("log", { level, scope, args: data });
if (level <= logLevel) return;
if (level < logLevel) return;
logFuncs[level](`[${scope.padEnd(longestScope, " ")}]`, ...data);
};

View File

@ -22,13 +22,11 @@ export const savePerformance = async (url: string, seconds: number) => {
if (u.pathname.includes("_frsh/")) return;
u.searchParams.delete("__frsh_c");
console.log("Saving performance", u.pathname, u.search, seconds);
const res = await db.insert(performanceTable).values({
await db.insert(performanceTable).values({
path: decodeURIComponent(u.pathname),
search: u.search,
time: Math.floor(seconds * 1000),
});
console.log({ res });
};
export async function getPerformances(): Promise<PerformanceRes> {

View File

@ -87,7 +87,7 @@ export class ConcurrentPromiseQueue {
*/
private queues: PromiseQueue[] = [];
constructor(concurrency: number) {
constructor(concurrency: number = 1) {
this.queues = Array.from({ length: concurrency }).map(() => {
return new PromiseQueue();
});

View File

@ -38,7 +38,7 @@ export const imageTable = sqliteTable("image", {
createdAt: integer("created_at", { mode: "timestamp" }).default(
sql`(current_timestamp)`,
),
path: text().notNull(),
url: text().notNull(),
average: text().notNull(),
blurhash: text().notNull(),
mime: text().notNull(),

View File

@ -7,7 +7,7 @@ const DB_FILE = "file:data-dev/db.sqlite";
// You can specify any property from the libsql connection options
export const db = drizzle({
logger: true,
// logger: true,
connection: {
url: DB_FILE,
},

View File

@ -99,13 +99,14 @@ export async function createTypesenseDocument(doc: TypesenseDocument) {
const client = await getTypeSenseClient();
if (!client) return;
await client.collections("resources").documents().create(
doc,
{ action: "upsert" },
);
// await client.collections("resources").documents().create(
// doc,
// { action: "upsert" },
// );
}
async function synchronizeWithTypesense() {
return;
await init;
try {
const allResources = (await Promise.all([
@ -135,6 +136,8 @@ async function synchronizeWithTypesense() {
};
});
return;
await client.collections("resources").documents().import(
typesenseDocuments,
{ action: "upsert" },