fix: most of the template blocks

This commit is contained in:
Max Richter
2025-09-30 19:28:56 +02:00
parent d35f3e5e2e
commit 2a1572f99d
20 changed files with 210 additions and 187 deletions

View File

@@ -20,21 +20,27 @@ func ParseBlock(input string, block template.Block) (any, error) {
case template.CodecHashtags:
return Keywords(input, block)
}
return nil, fmt.Errorf("unknown codec: %s", block.Codec)
fmt.Printf("%#v\n", block)
return nil, fmt.Errorf("unknown codec '%s'", block.Codec)
}
func Parse(matches []matcher.Block) (any, error) {
var result any
for _, m := range matches {
for i, m := range matches {
if m.Block.Path == "@index" {
continue
}
input := m.GetContent()
value, err := ParseBlock(input, m.Block)
var blockIdentifier any
blockIdentifier = m.Block.Path
if blockIdentifier == "" {
blockIdentifier = fmt.Sprintf("#%d", i)
}
if err != nil {
return nil, fmt.Errorf("failed to parse block(%s): %w", m.Block.Path, err)
return nil, fmt.Errorf("failed to parse block(%s) -> %w", blockIdentifier, err)
}
result = utils.SetPathValue(m.Block.Path, value, result)
}

View File

@@ -50,7 +50,6 @@ func MatchBlocksFuzzy(markdown string, templateBlocks []template.Block, maxDist
}
}
// Handle the last block
if len(templateBlocks) > 0 {
lastBlock := templateBlocks[len(templateBlocks)-1]
if lastBlock.Type == template.DataBlock {

View File

@@ -10,7 +10,7 @@ import (
"git.max-richter.dev/max/marka/testdata"
)
func TestFuzzyFindAll(t *testing.T) {
func TestMatch_FuzzyFindAll(t *testing.T) {
recipeMd := testdata.Read(t, "baguette/input.md")
tests := []struct {
@@ -36,13 +36,14 @@ func TestFuzzyFindAll(t *testing.T) {
}
}
func TestFuzzyBlockMatch(t *testing.T) {
func TestMatch_FuzzyBlockBaguette(t *testing.T) {
recipeMd := testdata.Read(t, "baguette/input.md")
schemaMd, err := registry.GetTemplate("Recipe")
if err != nil {
t.Errorf("Failed to load template: %s", err.Error())
t.FailNow()
}
blocks, err := template.CompileTemplate(schemaMd)
if err != nil {
t.Errorf("Failed to compile template: %s", err.Error())
@@ -51,9 +52,8 @@ func TestFuzzyBlockMatch(t *testing.T) {
matches := matcher.MatchBlocksFuzzy(string(recipeMd), blocks, 0.3)
for _, b := range blocks {
fmt.Printf("Block: %+v\n", b)
fmt.Printf("Content: '%q'\n\n", b.GetContent())
for _, m := range matches {
fmt.Printf("Content: '%s'->'%q'\n\n", m.Block.Path, m.GetContent())
}
expected := []struct {
@@ -66,10 +66,7 @@ func TestFuzzyBlockMatch(t *testing.T) {
value: "Baguette",
},
{
value: "\nMy favourite baguette recipe",
},
{
value: "",
value: "My favourite baguette recipe",
},
{
value: "- Flour\n- Water\n- Salt",
@@ -87,11 +84,10 @@ func TestFuzzyBlockMatch(t *testing.T) {
if expected[i].value != m.GetContent() {
t.Errorf("Match %d did not match expected: %q", i, expected[i].value)
}
fmt.Printf("match: %s->%q\n", m.Block.Path, m.GetContent())
}
}
func TestFuzzyBlockMatchSalad(t *testing.T) {
func TestMatch_FuzzyBlockSalad(t *testing.T) {
recipeMd := testdata.Read(t, "recipe_salad/input.md")
schemaMd, err := registry.GetTemplate("Recipe")
if err != nil {
@@ -115,9 +111,6 @@ func TestFuzzyBlockMatchSalad(t *testing.T) {
{
value: "Simple Salad",
},
{
value: "#healthy #salad",
},
{
value: "A quick green salad.",
},

View File

@@ -5,7 +5,6 @@ package parser
import (
"fmt"
"strings"
"time"
"git.max-richter.dev/max/marka/parser/decoders"
"git.max-richter.dev/max/marka/parser/matcher"
@@ -53,79 +52,33 @@ func MatchBlocks(markdownContent, templateContent string) ([]matcher.Block, erro
}
func ParseFile(markdownContent string) (any, error) {
timings := make(map[string]int64)
startDetectType := time.Now()
markdownContent = strings.TrimSuffix(markdownContent, "\n")
contentType, err := DetectType(markdownContent)
if err != nil {
return nil, fmt.Errorf("could not detect type -> %w", err)
}
timings["detect_type"] = time.Since(startDetectType).Milliseconds()
startGetTemplate := time.Now()
templateContent, err := registry.GetTemplate(contentType)
if err != nil {
return nil, fmt.Errorf("could not get schema -> %w", err)
}
timings["get_template"] = time.Since(startGetTemplate).Milliseconds()
startTemplate := time.Now()
tpl, err := template.CompileTemplate(templateContent)
if err != nil {
return nil, fmt.Errorf("failed to compile template -> %w", err)
}
timings["template_compilation"] = time.Since(startTemplate).Milliseconds()
startMarkdown := time.Now()
blocks := matcher.MatchBlocksFuzzy(markdownContent, tpl, 0.3)
fmt.Println("Blocks: ", len(blocks))
for i, b := range blocks {
fmt.Printf("Block %d %+v\n", i, b)
fmt.Printf("Content %d: %q\n\n", i, b.GetContent())
return nil, fmt.Errorf("could not get template -> %w", err)
}
result, err := decoders.Parse(blocks)
if err != nil {
return nil, fmt.Errorf("failed to parse blocks -> %w", err)
}
timings["markdown_parsing"] = time.Since(startMarkdown).Milliseconds()
response := map[string]any{
"data": result,
"timings": timings,
}
return response, nil
return ParseFileWithTemplate(markdownContent, templateContent)
}
func ParseFileWithTemplate(markdownContent string, templateContent string) (any, error) {
timings := make(map[string]int64)
startTemplate := time.Now()
markdownContent = strings.TrimSuffix(markdownContent, "\n")
tpl, err := template.CompileTemplate(templateContent)
if err != nil {
return nil, fmt.Errorf("failed to compile template -> %w", err)
}
timings["template_compilation"] = time.Since(startTemplate).Milliseconds()
startMarkdown := time.Now()
blocks := matcher.MatchBlocksFuzzy(markdownContent, tpl, 0.3)
result, err := decoders.Parse(blocks)
if err != nil {
return nil, fmt.Errorf("failed to parse blocks -> %w", err)
}
timings["markdown_parsing"] = time.Since(startMarkdown).Milliseconds()
response := map[string]any{
"data": result,
"timings": timings,
return nil, fmt.Errorf("failed to compile blocks -> %w", err)
}
return response, nil
return result, nil
}

View File

@@ -9,7 +9,30 @@ import (
"github.com/google/go-cmp/cmp"
)
func TestParseRecipe_Golden(t *testing.T) {
func TestParse_DetectType(t *testing.T) {
recipe := testdata.Read(t, "recipe_salad/input.md")
article := testdata.Read(t, "article_simple/input.md")
recipeType, err := parser.DetectType(string(recipe))
if err != nil {
t.Fatalf("failed to detect recipeType: %v", err)
}
articleType, err := parser.DetectType(string(article))
if err != nil {
t.Fatalf("failed to detect articleType: %v", err)
}
if recipeType != "Recipe" {
t.Errorf("recipeType did not match expected type 'Recipe' -> %s", recipeType)
}
if articleType != "Article" {
t.Errorf("articleType did not match expected type 'Article' -> %s", articleType)
}
}
func TestParse_RecipeSalad(t *testing.T) {
inputContent := testdata.Read(t, "recipe_salad/input.md")
output := testdata.Read(t, "recipe_salad/output.json")
@@ -18,19 +41,17 @@ func TestParseRecipe_Golden(t *testing.T) {
t.Fatalf("ParseFile: %v", err)
}
gotMap := got.(map[string]any)
var want map[string]any
if err := json.Unmarshal(output, &want); err != nil {
t.Fatalf("unmarshal expected.json: %v", err)
}
if diff := cmp.Diff(want, gotMap["data"]); diff != "" {
if diff := cmp.Diff(want, got); diff != "" {
t.Fatalf("JSON mismatch (-want +got):\n%s", diff)
}
}
func TestParseRecipe_NoDescription(t *testing.T) {
func TestParse_RecipeNoDescription(t *testing.T) {
inputContent := testdata.Read(t, "recipe_no_description/input.md")
got, err := parser.ParseFile(string(inputContent))
@@ -49,7 +70,7 @@ func TestParseRecipe_NoDescription(t *testing.T) {
}
}
func TestParseRecipe_Baguette(t *testing.T) {
func TestParse_Baguette(t *testing.T) {
inputContent := testdata.Read(t, "baguette/input.md")
got, err := parser.ParseFile(string(inputContent))
@@ -68,7 +89,7 @@ func TestParseRecipe_Baguette(t *testing.T) {
}
}
func TestParseArticle_Simple(t *testing.T) {
func TestParse_Article(t *testing.T) {
inputContent := testdata.Read(t, "article_simple/input.md")
got, err := parser.ParseFile(string(inputContent))

View File

@@ -8,6 +8,23 @@ declare global {
// interface PageState {}
// interface Platform {}
}
class Go {
new(): {
run: (inst: WebAssembly.Instance) => Promise<void>;
importObject: WebAssembly.Imports;
};
}
const marka: {
matchBlocks(s: string, t: string): string;
detectType(markdown: string): string;
parseFile(input: string): string;
parseFileWithTemplate(markdown: string, template: string): string;
listTemplates(): string;
getTemplate(name: string): string;
compileTemplate(source: string): string;
};
}
export {};
export {};

View File

@@ -2,6 +2,7 @@
import { json } from '@codemirror/lang-json';
import { markdown } from '@codemirror/lang-markdown';
import {
compileTemplate,
getTemplate,
listTemplates,
parseMarkdown,
@@ -86,9 +87,12 @@ My favourite baguette recipe
return;
}
try {
compileTemplate(templateValue);
const result = templateValue
? parseMarkdownWithTemplate(markdownValue, templateValue)
: parseMarkdown(markdownValue);
console.log({ result });
if ('error' in result) {
jsonOutput = '';

View File

@@ -1,20 +1,6 @@
import { readable } from "svelte/store";
declare global {
interface Window {
Go: {
new(): {
run: (inst: WebAssembly.Instance) => Promise<void>;
importObject: WebAssembly.Imports;
};
};
markaMatchBlocks: (input: string) => unknown;
markaParseFile: (input: string) => string;
markaParseFileWithTemplate: (markdown: string, template: string) => string;
markaListTemplates: () => string;
markaGetTemplate: (name: string) => string;
}
}
export const wasmReady = readable(false, (set) => {
if (typeof window === "undefined") {
@@ -22,7 +8,7 @@ export const wasmReady = readable(false, (set) => {
}
const loadWasm = async () => {
const go = new window.Go();
const go = new globalThis.Go();
try {
const result = await WebAssembly.instantiateStreaming(
fetch("/main.wasm"),
@@ -38,7 +24,7 @@ export const wasmReady = readable(false, (set) => {
if (document.readyState === "complete") {
loadWasm();
} else {
window.addEventListener("load", loadWasm);
globalThis.addEventListener("load", loadWasm);
}
});
@@ -54,46 +40,75 @@ export type ParseResultError = {
export type ParseResult = ParseResultSuccess | ParseResultError;
export function parseMarkdown(markdown: string): ParseResult {
if (typeof window.markaParseFile !== "function") {
if (typeof globalThis.marka?.parseFile !== "function") {
throw new Error("Wasm module not ready");
}
const result = window.markaParseFile(markdown);
if (result.error) return result;
return JSON.parse(result);
const resultString = globalThis.marka.parseFile(markdown);
return JSON.parse(resultString);
}
export function matchBlocks(markdown: string): ParseResult {
if (typeof window.markaMatchBlocks !== "function") {
export function compileTemplate(templateSource: string) {
if (typeof globalThis.marka?.compileTemplate !== "function") {
throw new Error("Wasm module not ready");
}
const result = window.markaMatchBlocks(markdown) as ParseResult;
if (result.error) return result;
return JSON.parse(result);
const resultString = globalThis.marka.compileTemplate(templateSource);
const result = JSON.parse(resultString);
console.log({ result });
return result;
}
export function matchBlocks(markdown: string, template: string): ParseResult {
if (typeof globalThis.marka?.matchBlocks !== "function") {
throw new Error("Wasm module not ready");
}
const resultString = globalThis.marka.matchBlocks(markdown, template);
return JSON.parse(resultString);
}
export function parseMarkdownWithTemplate(
markdown: string,
template: string,
): ParseResult {
if (typeof window.markaParseFileWithTemplate !== "function") {
if (typeof globalThis.marka?.parseFileWithTemplate !== "function") {
throw new Error("Wasm module not ready");
}
const result = window.markaParseFileWithTemplate(markdown, template);
if (result.error) return result;
return JSON.parse(result);
const resultString = globalThis.marka.parseFileWithTemplate(
markdown,
template,
);
return JSON.parse(resultString);
}
export function listTemplates(): string[] {
if (typeof window.markaListTemplates !== "function") {
if (typeof globalThis.marka?.listTemplates !== "function") {
throw new Error("Wasm module not ready");
}
const result = window.markaListTemplates();
return JSON.parse(result);
const resultString = globalThis.marka.listTemplates();
return JSON.parse(resultString);
}
export function getTemplate(name: string): string {
if (typeof window.markaGetTemplate !== "function") {
if (typeof globalThis.marka?.getTemplate !== "function") {
throw new Error("Wasm module not ready");
}
return window.markaGetTemplate(name);
return globalThis.marka.getTemplate(name);
}
export function detectType(markdown: string): string | ParseResultError {
if (typeof globalThis.marka?.detectType !== "function") {
throw new Error("Wasm module not ready");
}
const result = globalThis.marka.detectType(markdown);
try {
// If the result is a JSON string with an error, parse and return it
const parsed = JSON.parse(result);
if (parsed.error) {
return parsed;
}
} catch (e) {
// Otherwise, it's a plain string for success
return result;
}
return result;
}

Binary file not shown.

View File

@@ -7,12 +7,13 @@ OUT_WASM="$OUT_DIR/main.wasm"
mkdir -p "$OUT_DIR"
tinygo build -target=wasm -opt=z -no-debug -panic=trap -gc=leaking \
tinygo build -target=wasm \
-opt=z -no-debug -panic=print -gc=leaking \
-o "$OUT_WASM" "$SCRIPT_DIR"
# Optional post-process (run only if tools exist)
# command -v wasm-opt >/dev/null && wasm-opt -Oz --strip-debug --strip-dwarf --strip-producers \
# -o "$OUT_WASM.tmp" "$OUT_WASM" && mv "$OUT_WASM.tmp" "$OUT_WASM"
command -v wasm-opt >/dev/null && wasm-opt -Oz --strip-debug --strip-dwarf --strip-producers \
-o "$OUT_WASM.tmp" "$OUT_WASM" && mv "$OUT_WASM.tmp" "$OUT_WASM"
# command -v wasm-strip >/dev/null && wasm-strip "$OUT_WASM"
# command -v brotli >/dev/null && brotli -f -q 11 "$OUT_WASM" -o "$OUT_WASM.br"
# command -v gzip >/dev/null && gzip -c -9 "$OUT_WASM" > "$OUT_WASM.gz"

View File

@@ -8,92 +8,108 @@ import (
p "git.max-richter.dev/max/marka/parser"
"git.max-richter.dev/max/marka/registry"
"git.max-richter.dev/max/marka/template"
)
func matchBlocks(_ js.Value, args []js.Value) any {
if len(args) == 0 {
return js.ValueOf(map[string]any{"error": "missing markdown"})
}
t, err := p.MatchBlocks(args[0].String(), args[1].String())
if err != nil {
return js.ValueOf(map[string]any{"error": err.Error()})
}
jsonString, _ := json.Marshal(t)
return js.ValueOf(string(jsonString)) // plain string
func wrapError(err error) string {
errMap := map[string]any{"error": err.Error()}
errJSON, _ := json.Marshal(errMap)
return string(errJSON)
}
func detectType(_ js.Value, args []js.Value) any {
if len(args) == 0 {
return js.ValueOf(map[string]any{"error": "missing markdown"})
}
t, err := p.DetectType(args[0].String())
func MatchBlocks(this js.Value, args []js.Value) any {
s := args[0].String()
t := args[1].String()
matched, err := p.MatchBlocks(s, t)
if err != nil {
return js.ValueOf(map[string]any{"error": err.Error()})
return wrapError(err)
}
return js.ValueOf(t) // plain string
jsonString, _ := json.Marshal(matched)
return string(jsonString)
}
func parseFile(_ js.Value, args []js.Value) any {
if len(args) == 0 {
return js.ValueOf(map[string]any{"error": "missing markdown"})
}
res, err := p.ParseFile(args[0].String())
func DetectType(this js.Value, args []js.Value) any {
markdown := args[0].String()
t, err := p.DetectType(markdown)
if err != nil {
return js.ValueOf(map[string]any{"error": err.Error()})
return wrapError(err)
}
return t
}
func ParseFile(this js.Value, args []js.Value) any {
markdown := args[0].String()
res, err := p.ParseFile(markdown)
if err != nil {
return wrapError(err)
}
b, err := json.Marshal(res)
if err != nil {
return js.ValueOf(map[string]any{"error": err.Error()})
return wrapError(err)
}
return js.ValueOf(string(b))
return string(b)
}
func parseFileWithTemplate(_ js.Value, args []js.Value) any {
if len(args) < 2 {
return js.ValueOf(map[string]any{"error": "missing markdown or template"})
}
res, err := p.ParseFileWithTemplate(args[0].String(), args[1].String())
func ParseFileWithTemplate(this js.Value, args []js.Value) any {
markdown := args[0].String()
template := args[1].String()
res, err := p.ParseFileWithTemplate(markdown, template)
if err != nil {
return js.ValueOf(map[string]any{"error": err.Error()})
return wrapError(err)
}
b, err := json.Marshal(res)
if err != nil {
return js.ValueOf(map[string]any{"error": err.Error()})
return wrapError(err)
}
return js.ValueOf(string(b))
return string(b)
}
func listTemplates(_ js.Value, args []js.Value) any {
func ListTemplates(this js.Value, args []js.Value) any {
templates, err := registry.ListTemplates()
if err != nil {
return js.ValueOf(map[string]any{"error": err.Error()})
return wrapError(err)
}
b, err := json.Marshal(templates)
if err != nil {
return js.ValueOf(map[string]any{"error": err.Error()})
return wrapError(err)
}
return js.ValueOf(string(b))
return string(b)
}
func getTemplate(_ js.Value, args []js.Value) any {
if len(args) == 0 {
return js.ValueOf(map[string]any{"error": "missing template name"})
}
template, err := registry.GetTemplate(args[0].String())
func GetTemplate(this js.Value, args []js.Value) any {
name := args[0].String()
template, err := registry.GetTemplate(name)
if err != nil {
return js.ValueOf(map[string]any{"error": err.Error()})
return wrapError(err)
}
return js.ValueOf(template)
return template
}
func CompileTemplate(this js.Value, args []js.Value) any {
source := args[0].String()
template, err := template.CompileTemplate(source)
if err != nil {
return wrapError(err)
}
b, err := json.Marshal(template)
if err != nil {
return wrapError(err)
}
return string(b)
}
func main() {
js.Global().Set("markaDetectType", js.FuncOf(detectType))
js.Global().Set("markaParseFile", js.FuncOf(parseFile))
js.Global().Set("markaParseFileWithTemplate", js.FuncOf(parseFileWithTemplate))
js.Global().Set("markaMatchBlocks", js.FuncOf(matchBlocks))
js.Global().Set("markaListTemplates", js.FuncOf(listTemplates))
js.Global().Set("markaGetTemplate", js.FuncOf(getTemplate))
marka := js.Global().Get("Object").New()
marka.Set("matchBlocks", js.FuncOf(MatchBlocks))
marka.Set("detectType", js.FuncOf(DetectType))
marka.Set("parseFile", js.FuncOf(ParseFile))
marka.Set("parseFileWithTemplate", js.FuncOf(ParseFileWithTemplate))
marka.Set("listTemplates", js.FuncOf(ListTemplates))
marka.Set("getTemplate", js.FuncOf(GetTemplate))
marka.Set("compileTemplate", js.FuncOf(CompileTemplate))
js.Global().Set("marka", marka)
select {}
}
}

View File

@@ -22,16 +22,13 @@
pathAlias: rating
- path: reviewRating.bestRating
codec: const
value: 5
hidden: true
- path: reviewRating.worstRating
codec: const
value: 1
hidden: true
}
---
# { headline }
{ keywords | hashtags }
{ articleBody }

View File

@@ -10,7 +10,6 @@
- path: "_type"
codec: const
value: Recipe
hidden: true
- path: image
- path: author._type
codec: const
@@ -30,7 +29,6 @@
---
# { name | text }
{ keywords | hashtags,optional }
{ description | text }

View File

@@ -18,11 +18,11 @@ type yamlBlock struct {
}
type yamlField struct {
Path string `yaml:"path"`
Value any `yaml:"value,omitempty"`
Codec string `yaml:"codec"`
Hidden bool `yaml:"hidden,omitempty"`
PathAlias []string `yaml:"pathAlias,omitempty"`
Path string `yaml:"path"`
Value any `yaml:"value,omitempty"`
Codec string `yaml:"codec"`
Hidden bool `yaml:"hidden,omitempty"`
PathAlias string `yaml:"pathAlias,omitempty"`
}
func parseYamlTemplate(input Slice) (block Block, err error) {
@@ -34,7 +34,7 @@ func parseYamlTemplate(input Slice) (block Block, err error) {
dec.KnownFields(true)
if err := dec.Decode(&blk); err != nil {
return block, NewErrorf("content '%q' -> %w", cleaned, err).WithPosition(input.start, input.end)
return block, NewErrorf("failed to parse yaml -> %w", err).WithPosition(input.start, input.end)
}
if blk.Path == "" {

View File

@@ -1,9 +1,13 @@
package template
import "strings"
// CompileTemplate scans once, emitting:
// - data blocks: inner content between a line that's exactly "{" and a line that's exactly "}"
// - matching blocks: gaps between data blocks (excluding the brace lines themselves)
func CompileTemplate(templateSource string) ([]Block, error) {
templateSource = strings.TrimSuffix(templateSource, "\n")
var out []Block
var curlyIndex int

View File

@@ -1,6 +1,7 @@
package template_test
import (
"fmt"
"testing"
"git.max-richter.dev/max/marka/registry"
@@ -20,6 +21,10 @@ func TestExtractBlocks(t *testing.T) {
t.FailNow()
}
for i, b := range templateBlocks {
fmt.Printf("Block#%d: %q\n", i, b.GetContent())
}
expected := []template.Block{
{
Type: template.MatchingBlock,
@@ -58,14 +63,11 @@ func TestExtractBlocks(t *testing.T) {
{Type: template.MatchingBlock},
{Type: template.DataBlock, Path: "name", Codec: "text"},
{Type: template.MatchingBlock},
{Type: template.DataBlock, Path: "keywords", Codec: "hashtags", Optional: true},
{Type: template.MatchingBlock},
{Type: template.DataBlock, Path: "description", Codec: "text"},
{Type: template.MatchingBlock},
{Type: template.DataBlock, Path: "recipeIngredient", Codec: "list", ListTemplate: "- { . }"},
{Type: template.MatchingBlock},
{Type: template.DataBlock, Path: "recipeInstructions", Codec: "list", ListTemplate: "{ @index }. { . }"},
{Type: template.MatchingBlock},
}
if len(templateBlocks) != len(expected) {

View File

@@ -1,4 +1,5 @@
{
"_schema": "Article",
"_type": "Article",
"headline": "My First Article",
"author": {

View File

@@ -1,4 +1,5 @@
{
"_schema": "Recipe",
"_type": "Recipe",
"name": "Baguette",
"author": {

View File

@@ -8,7 +8,6 @@ recipeYield: 2 servings
---
# Simple Salad
#healthy #salad
A quick green salad.

View File

@@ -7,10 +7,6 @@
"_type": "Person",
"name": "Alex Chef"
},
"keywords": [
"healthy",
"salad"
],
"description": "A quick green salad.",
"prepTime": "PT10M",
"cookTime": "PT0M",