ffs, i should have committed wayyy earlier

This commit is contained in:
Max Richter
2025-08-16 20:38:40 +02:00
commit 43644c4f40
25 changed files with 865 additions and 0 deletions

4
.gitignore vendored Normal file
View File

@@ -0,0 +1,4 @@
/bin/
/dist/
/.marka/
/*.log

6
README.md Normal file
View File

@@ -0,0 +1,6 @@
# Marka
Bidirectional mapping between Markdown and JSON (Schema.org-style) via small, declarative templates.
Marka lets you parse Markdown → JSON and render JSON → Markdown using the same template.

62
cmd/marka/main.go Normal file
View File

@@ -0,0 +1,62 @@
package main
import (
"encoding/json"
"flag"
"log"
"os"
"git.max-richter.dev/max/marka/parser"
"git.max-richter.dev/max/marka/renderer"
)
func main() {
sub := "parse"
if len(os.Args) > 1 {
sub = os.Args[1]
}
switch sub {
case "parse":
fs := flag.NewFlagSet("parse", flag.ExitOnError)
tpl := fs.String("template", "", "template file (Markdown)")
in := fs.String("in", "", "input markdown")
schema := fs.String("schema", "", "json schema (optional)")
out := fs.String("out", "-", "output json (- for stdout)")
_ = fs.Parse(os.Args[2:])
data, err := parser.ParseFile(*tpl, *in, *schema)
if err != nil {
log.Fatal(err)
}
enc := json.NewEncoder(dest(*out))
enc.SetIndent("", " ")
_ = enc.Encode(data)
case "render":
fs := flag.NewFlagSet("render", flag.ExitOnError)
tpl := fs.String("template", "", "template file (Markdown)")
in := fs.String("in", "", "input json")
out := fs.String("out", "-", "output markdown (- for stdout)")
_ = fs.Parse(os.Args[2:])
md, err := renderer.RenderFile(*tpl, *in)
if err != nil {
log.Fatal(err)
}
_, _ = dest(*out).Write(md)
default:
log.Fatalf("unknown subcommand: %s (use parse|render)", sub)
}
}
func dest(path string) *os.File {
if path == "-" {
return os.Stdout
}
f, err := os.Create(path)
if err != nil {
log.Fatal(err)
}
return f
}

7
go.work Normal file
View File

@@ -0,0 +1,7 @@
go 1.24.3
use (
./parser
./registry
./renderer
)

97
parser/blocks.go Normal file
View File

@@ -0,0 +1,97 @@
package parser
type BlockType string
const (
BlockData BlockType = "data" // content between lines "{" and "}"
BlockMatching BlockType = "matching" // everything outside data blocks
)
type Block struct {
Type BlockType
Start, End int // byte offsets [Start, End)
src *string
}
func (b Block) GetContent() string {
if b.src == nil || b.Start < 0 || b.End > len(*b.src) || b.Start > b.End {
return ""
}
return (*b.src)[b.Start:b.End]
}
// ExtractBlocks scans once, emitting:
// - data blocks: inner content between a line that's exactly "{" and a line that's exactly "}"
// - matching blocks: gaps between data blocks (excluding the brace lines themselves)
func ExtractBlocks(src string) []Block {
var out []Block
var curlyIndex int
const CLOSING = '}'
const OPENING = '{'
if len(src) > 0 && src[0] == OPENING {
curlyIndex = 1
out = append(out, Block{
Start: 0,
Type: BlockData,
src: &src,
})
} else {
out = append(out, Block{
Start: 0,
Type: BlockMatching,
src: &src,
})
}
for i, r := range src {
var nextCurlyIndex = curlyIndex
switch r {
case OPENING:
nextCurlyIndex++
case CLOSING:
nextCurlyIndex--
}
var nextChar rune = ' '
if i+1 < len(src) {
nextChar = rune(src[i+1])
}
if curlyIndex == 0 && nextCurlyIndex == 1 {
out[len(out)-1].End = i
out = append(out, Block{
Start: i,
Type: BlockData,
src: &src,
})
} else if curlyIndex == 1 && nextCurlyIndex == 0 {
out[len(out)-1].End = i + 1
if nextChar == OPENING {
out = append(out, Block{
Start: i + 1,
Type: BlockData,
src: &src,
})
} else {
out = append(out, Block{
Start: i + 1,
Type: BlockMatching,
src: &src,
})
}
}
curlyIndex = nextCurlyIndex
}
var lastBlock = out[len(out)-1]
if lastBlock.End == 0 {
out = out[:len(out)-1]
}
return out
}

51
parser/blocks_test.go Normal file
View File

@@ -0,0 +1,51 @@
package parser
import (
"os"
"path/filepath"
"strings"
"testing"
)
func readFile(t *testing.T, fileName string) string {
path := filepath.Join("testdata", fileName)
data, err := os.ReadFile(path)
if err != nil {
t.Fatalf("failed to read test data file: %v", err)
}
return string(data)
}
func TestExtractBlocks(t *testing.T) {
src := readFile(t, "recipe.schema.md")
blocks := ExtractBlocks(src)
expected := []struct {
Type BlockType
Content string
}{
{BlockMatching, "---\\n"},
{BlockData, "{ . }"},
{BlockMatching, "\\n---\\n\\n# "},
{BlockData, "{ name | text,required }"},
{BlockMatching, "\\n\\n"},
{BlockData, "{ description | text,optional }"},
{BlockMatching, "\\n\\n## Ingredients\\n"},
{BlockData, "{\\n path: recipeIngredient\\n codec: list\\n required: true\\n item:\\n template: \"- { . }\"\\n}"},
{BlockMatching, "\\n\\n## Steps\\n"},
{BlockData, "{\\n path: recipeInstructions\\n codec: list\\n required: true\\n item:\\n template: \"{ @index }. { . }\"\\n}"},
}
if len(blocks) != len(expected) {
t.Fatalf("expected %d blocks, got %d", len(expected), len(blocks))
}
for i, b := range blocks {
exp := expected[i]
content := strings.ReplaceAll(b.GetContent(), "\n", "\\n")
if b.Type != exp.Type || content != exp.Content {
t.Errorf("Block %d: expected %v, got Type: %v, Start: %d, End: %d, Content: %s", i, exp, b.Type, b.Start, b.End, content)
}
}
}

5
parser/go.mod Normal file
View File

@@ -0,0 +1,5 @@
module git.max-richter.dev/max/marka/parser
go 1.24.3
require github.com/agext/levenshtein v1.2.3

2
parser/go.sum Normal file
View File

@@ -0,0 +1,2 @@
github.com/agext/levenshtein v1.2.3 h1:YB2fHEn0UJagG8T1rrWknE3ZQzWM06O8AMAatNn7lmo=
github.com/agext/levenshtein v1.2.3/go.mod h1:JEDfjyjHDjOF/1e4FlBE/PkbqA9OfWu2ki2W0IB5558=

103
parser/matcher.go Normal file
View File

@@ -0,0 +1,103 @@
package parser
import (
"math"
"github.com/agext/levenshtein"
)
type MatchBlock struct {
Start, End int
Block Block
src *string
}
func (m MatchBlock) GetContent() string {
if m.src == nil || m.Start < 0 || m.End > len(*m.src) || m.Start > m.End {
return ""
}
return (*m.src)[m.Start:m.End]
}
// MatchBlocksFuzzy finds anchor positions for all BlockMatching blocks using
// Levenshtein distance (tolerant matching), then returns ONLY the BlockData
// segments as gaps between those anchors.
func MatchBlocksFuzzy(markdown string, blocks []Block, maxDist float64) []MatchBlock {
var out []MatchBlock
var lastIndex = 0
for i, b := range blocks {
if b.Type == BlockMatching {
start, end := FuzzyFind(markdown, lastIndex, b.GetContent(), 0.3)
if end != -1 {
if i > 0 {
previousBlock := blocks[i-1]
if previousBlock.Type == BlockData {
out = append(out, MatchBlock{
Start: lastIndex,
End: start,
Block: previousBlock,
src: &markdown,
})
}
}
lastIndex = end
}
}
}
// Handle the last block
lastBlock := blocks[len(blocks)-1]
if lastBlock.Type == BlockData {
out = append(out, MatchBlock{
Start: lastIndex,
End: len(markdown),
Block: lastBlock,
src: &markdown,
})
}
return out
}
func FuzzyFind(haystack string, from int, needle string, maxDist float64) (start int, end int) {
bestStart, bestEnd, bestDist := -1, -1, math.MaxFloat64
needleLen := len(needle)
minWindow := max(1, needleLen-int(float64(needleLen)*maxDist)-1)
maxWindow := needleLen + int(float64(needleLen)*maxDist) + 1
for i := from; i < len(haystack); i++ {
for windowSize := minWindow; windowSize <= maxWindow && i+windowSize <= len(haystack); windowSize++ {
sub := haystack[i : i+windowSize]
dist := levenshtein.Distance(sub, needle, nil)
maxLen := max(needleLen, windowSize)
norm := float64(dist)/float64(maxLen) + float64(abs(windowSize-needleLen))*0.01/float64(maxLen)
if norm < bestDist {
bestStart, bestEnd, bestDist = i, i+windowSize, norm
}
}
if bestDist <= 0.05 {
break
}
}
if bestStart >= 0 && bestDist <= maxDist+0.01 {
return bestStart, bestEnd
}
return -1, -1
}
func abs(x int) int {
if x < 0 {
return -x
}
return x
}
func max(a, b int) int {
if a > b {
return a
}
return b
}

83
parser/matcher_test.go Normal file
View File

@@ -0,0 +1,83 @@
package parser_test
import (
"os"
"path/filepath"
"testing"
"git.max-richter.dev/max/marka/parser"
)
func readFile(t *testing.T, fileName string) string {
path := filepath.Join("testdata", fileName)
data, err := os.ReadFile(path)
if err != nil {
t.Fatalf("failed to read test data file: %v", err)
}
return string(data)
}
func TestFuzzyFindAll(t *testing.T) {
recipeMd := readFile(t, "baguette.md")
tests := []struct {
Needle string
Start, End, StartIndex int
}{
{StartIndex: 0, Needle: "# Ingredients\n", Start: 72, End: 86},
{StartIndex: 0, Needle: "# Ingrdients\n", Start: 72, End: 86},
{StartIndex: 0, Needle: "# Inrdients\n", Start: 72, End: 86},
{StartIndex: 0, Needle: "---\n", Start: 0, End: 4},
{StartIndex: 4, Needle: "---\n", Start: 24, End: 28},
{StartIndex: 0, Needle: "# Steps\n", Start: 111, End: 119},
{StartIndex: 0, Needle: "# Stps\n", Start: 111, End: 119},
{StartIndex: 0, Needle: "# Step\n", Start: 111, End: 119},
}
for _, test := range tests {
start, end := parser.FuzzyFind(recipeMd, test.StartIndex, test.Needle, 0.3) // allow 50% error
if start != test.Start || end != test.End {
t.Errorf("Start or end do not match: Needle=%q Start=%d/%d End=%d/%d", test.Needle, test.Start, start, test.End, end)
}
}
}
func TestFuzzyBlockMatch(t *testing.T) {
recipeMd := readFile(t, "baguette.md")
schemaMd := readFile(t, "recipe.schema.md")
blocks := parser.ExtractBlocks(schemaMd)
matches := parser.MatchBlocksFuzzy(recipeMd, blocks, 0.3)
expected := []struct {
value string
}{
{
value: "author: Max Richter",
},
{
value: "Baguette",
},
{
value: "My favourite baguette recipe",
},
{
value: "- Flour\n- Water\n- Salt",
},
{
value: "1. Mix Flour Water and Salt\n2. Bake the bread",
},
}
for i, m := range matches {
if i > len(expected)-1 {
t.Errorf("No expected result for match: %d -> %q", i, m.GetContent())
t.FailNow()
}
if expected[i].value != m.GetContent() {
t.Errorf("Match %d did not match expected: %q", i, m.GetContent())
}
}
}

17
parser/parser.go Normal file
View File

@@ -0,0 +1,17 @@
// Package parser provides functions for parsing Markdown templates into
// structured JSON objects that conform to a JSON Schema.
package parser
func ParseFile(markdownContent string) (map[string]any, error) {
// _schema, err := registry.GetTemplate("Recipe")
// if err != nil {
// return nil, fmt.Errorf("could not get schema: %w", err)
// }
// Idea is to split the template into blocks, either "matching" blocks which are simple strings.
// Or "data" blocks which match the content. Then i want to soft match the "matching" blocks and "data" blocks to the template.
// The "matching" blocks should soft match with a levenshtein distance
return map[string]any{}, nil
}

43
parser/parser_test.go Normal file
View File

@@ -0,0 +1,43 @@
package parser_test
import (
"encoding/json"
"os"
"path/filepath"
"reflect"
"testing"
"git.max-richter.dev/max/marka/parser"
)
func TestParseRecipe_Golden(t *testing.T) {
td := filepath.Join("testdata", "recipe_salad")
input := filepath.Join(td, "input.md")
output := filepath.Join(td, "output.json")
inputContent, err := os.ReadFile(input)
if err != nil {
t.Fatalf("read input.md: %v", err)
}
got, err := parser.ParseFile(string(inputContent))
if err != nil {
t.Fatalf("ParseFile: %v", err)
}
var want map[string]any
b, err := os.ReadFile(output)
if err != nil {
t.Fatalf("read expected.json: %v", err)
}
if err := json.Unmarshal(b, &want); err != nil {
t.Fatalf("unmarshal expected.json: %v", err)
}
// Deep structural compare
if !reflect.DeepEqual(want, got) {
gb, _ := json.MarshalIndent(got, "", " ")
wb, _ := json.MarshalIndent(want, "", " ")
t.Fatalf("parsed JSON mismatch\n--- got ---\n%s\n--- want ---\n%s", string(gb), string(wb))
}
}

16
parser/testdata/baguette.md vendored Normal file
View File

@@ -0,0 +1,16 @@
---
author: Max Richter
---
# Baguette
My favourite baguette recipe
## Ingredients
- Flour
- Water
- Salt
## Steps
1. Mix Flour Water and Salt
2. Bake the bread

25
parser/testdata/recipe.schema.md vendored Normal file
View File

@@ -0,0 +1,25 @@
---
{ . }
---
# { name | text,required }
{ description | text,optional }
## Ingredients
{
path: recipeIngredient
codec: list
required: true
item:
template: "- { . }"
}
## Steps
{
path: recipeInstructions
codec: list
required: true
item:
template: "{ @index }. { . }"
}

25
parser/testdata/recipe_salad/input.md vendored Normal file
View File

@@ -0,0 +1,25 @@
---
@type: Recipe
image: https://example.com/salad.jpg
author: Alex Chef
datePublished: 2025-08-12
prepTime: PT10M
cookTime: PT0M
recipeYield: 2 servings
---
# Simple Salad
A quick green salad.
## Ingredients
- 100 g lettuce
- 5 cherry tomatoes
- 1 tbsp olive oil
- Pinch of salt
## Steps
1. Wash and dry the lettuce.
2. Halve the cherry tomatoes.
3. Toss with olive oil and salt.

View File

@@ -0,0 +1,26 @@
{
"@context": "https://schema.org/",
"@type": "Recipe",
"name": "Simple Salad",
"image": "https://example.com/salad.jpg",
"author": {
"@type": "Person",
"name": "Alex Chef"
},
"datePublished": "2025-08-12",
"description": "A quick green salad.",
"prepTime": "PT10M",
"cookTime": "PT0M",
"recipeYield": "2 servings",
"recipeIngredient": [
"100 g lettuce",
"5 cherry tomatoes",
"1 tbsp olive oil",
"Pinch of salt"
],
"recipeInstructions": [
"Wash and dry the lettuce.",
"Halve the cherry tomatoes.",
"Toss with olive oil and salt."
]
}

View File

@@ -0,0 +1,6 @@
description: "Core capture aliases for Marka"
patterns:
text: ".+"
word: "\\S+"
num: "(?:\\d+(?:[.,]\\d+)?(?:\\s?\\d+/\\d+)?)" # 3 | 1.5 | 1 1/2
indexMarker: "\\d+[.)]" # 1. / 1)

3
registry/go.mod Normal file
View File

@@ -0,0 +1,3 @@
module git.max-richter.dev/max/marka/registry
go 1.24.3

59
registry/registry.go Normal file
View File

@@ -0,0 +1,59 @@
// Package registry provides functionality for managing and accessing embedded file systems and directories.
package registry
import (
"embed"
"io"
"io/fs"
"os"
)
type Source interface {
Open(name string) (fs.File, error)
ReadFile(name string) ([]byte, error)
ReadDir(name string) ([]fs.DirEntry, error)
}
type src struct{ fsys fs.FS }
func (s src) Open(p string) (fs.File, error) { return s.fsys.Open(p) }
func (s src) ReadFile(p string) ([]byte, error) { return fs.ReadFile(s.fsys, p) }
func (s src) ReadDir(p string) ([]fs.DirEntry, error) { return fs.ReadDir(s.fsys, p) }
func FromDir(path string) Source { return src{fsys: os.DirFS(path)} }
//go:embed templates/*
var templates embed.FS
//go:embed schema-org/*
var schemas embed.FS
//go:embed aliases/*
var aliases embed.FS
func GetTemplates() Source {
return src{fsys: templates}
}
func GetTemplate(name string) (string, error) {
templateFile, err := templates.Open("templates/" + name + ".marka")
if err != nil {
return "", err
}
defer templateFile.Close()
templateBytes, err := io.ReadAll(templateFile)
if err != nil {
return "", err
}
return string(templateBytes), nil
}
func GetSchemas() Source {
return src{fsys: schemas}
}
func GetAliases() Source {
return src{fsys: aliases}
}

View File

@@ -0,0 +1,38 @@
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "https://schema.org/ImageObject",
"title": "ImageObject",
"description": "Minimal subset of schema.org/ImageObject for use in Recipe image property",
"type": "object",
"required": [
"@type",
"url"
],
"properties": {
"@type": {
"const": "ImageObject"
},
"url": {
"$ref": "url.json"
},
"caption": {
"type": "string",
"description": "A caption for the image."
},
"width": {
"type": [
"integer",
"string"
],
"description": "Width of the image in pixels or as a string with unit."
},
"height": {
"type": [
"integer",
"string"
],
"description": "Height of the image in pixels or as a string with unit."
}
},
"additionalProperties": true
}

View File

@@ -0,0 +1,103 @@
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "https://schema.org/Recipe",
"title": "Recipe",
"type": "object",
"required": [
"@context",
"@type",
"name",
"image",
"recipeIngredient",
"recipeInstructions"
],
"properties": {
"@context": {
"const": "https://schema.org/"
},
"@type": {
"const": "Recipe"
},
"name": {
"type": "string"
},
"image": {
"oneOf": [
{
"$ref": "Url.json"
},
{
"type": "array",
"items": {
"$ref": "Url.json"
},
"minItems": 1
},
{
"$ref": "ImageObject.json"
},
{
"type": "array",
"items": {
"$ref": "ImageObject.json"
},
"minItems": 1
}
]
},
"description": {
"type": "string"
},
"author": {
"type": "object",
"required": [
"@type",
"name"
],
"properties": {
"@type": {
"const": "Person"
},
"name": {
"type": "string"
}
},
"additionalProperties": false
},
"datePublished": {
"type": "string",
"format": "date"
},
"prepTime": {
"type": "string",
"pattern": "^P(T?\\d+H?\\d*M?\\d*S?)$"
},
"cookTime": {
"type": "string",
"pattern": "^P(T?\\d+H?\\d*M?\\d*S?)$"
},
"recipeYield": {
"type": "string"
},
"recipeIngredient": {
"type": "array",
"items": {
"type": "string"
}
},
"recipeInstructions": {
"oneOf": [
{
"type": "string"
},
{
"type": "array",
"items": {
"type": "string"
}
}
]
}
},
"additionalProperties": true
}

View File

@@ -0,0 +1,12 @@
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "https://schema.org/Url",
"title": "URL",
"description": "A URL as defined by schema.org/URL",
"type": "string",
"format": "uri",
"examples": [
"https://example.com",
"http://www.example.org/image.png"
]
}

View File

@@ -0,0 +1,60 @@
---
{
path: .
codec: yaml
required: true
assert:
"@context": https://schema.org/
"@type": Recipe
fields:
- path: name
codec: text
required: true
- path: image
codec: text
required: true
- path: author.@type
codec: const
value: Person
- path: author.name
codec: text
required: true
- path: datePublished
codec: text
optional: true
- path: description
codec: text
optional: true
- path: prepTime
codec: text
optional: true
- path: cookTime
codec: text
optional: true
- path: recipeYield
codec: text
optional: true
}
---
# { name | text,required }
{ description | text,optional }
## Ingredients
{
path: recipeIngredient
codec: list
required: true
item:
template: "- { . }"
}
## Steps
{
path: recipeInstructions
codec: list
required: true
item:
template: "{ @index }. { . }"
}

3
renderer/go.mod Normal file
View File

@@ -0,0 +1,3 @@
module git.max-richter.dev/max/marka/renderer
go 1.24.3

9
renderer/renderer.go Normal file
View File

@@ -0,0 +1,9 @@
package renderer
func RenderFile(templatePath, jsonPath string) ([]byte, error) {
// TODO:
// 1) load aliases + template
// 2) validate JSON against schema (optional)
// 3) apply codecs to produce Markdown
return []byte{}, nil
}