big tings

This commit is contained in:
Max Richter
2025-08-17 15:16:17 +02:00
parent 40b9be887d
commit c687eff53d
958 changed files with 32279 additions and 704 deletions

View File

@@ -1,74 +0,0 @@
package parser
import (
"fmt"
"git.max-richter.dev/max/marka/parser/blocks"
)
// ExtractBlocks scans once, emitting:
// - data blocks: inner content between a line that's exactly "{" and a line that's exactly "}"
// - matching blocks: gaps between data blocks (excluding the brace lines themselves)
func ExtractBlocks(template string) ([]blocks.TemplateBlock, error) {
var out []blocks.TemplateBlock
var curlyIndex int
const CLOSING = '}'
const OPENING = '{'
var start int
var blockType blocks.BlockType
if len(template) > 0 && template[0] == OPENING {
curlyIndex = 1
blockType = blocks.DataBlock
} else {
blockType = blocks.MatchingBlock
}
for i, r := range template {
var nextCurlyIndex = curlyIndex
switch r {
case OPENING:
nextCurlyIndex++
case CLOSING:
nextCurlyIndex--
}
var nextChar rune = ' '
if i+1 < len(template) {
nextChar = rune(template[i+1])
}
if curlyIndex == 0 && nextCurlyIndex == 1 {
block, err := blocks.ParseTemplateBlock(template[start:i], blockType)
if err != nil {
return nil, fmt.Errorf("Failed to parse block: %w", err)
}
out = append(out, block)
start = i
blockType = blocks.DataBlock
} else if curlyIndex == 1 && nextCurlyIndex == 0 {
block, err := blocks.ParseTemplateBlock(template[start:i+1], blockType)
if err != nil {
return nil, fmt.Errorf("Failed to parse block: %w", err)
}
out = append(out, block)
if nextChar == OPENING {
start = i + 1
blockType = blocks.DataBlock
} else {
start = i + 1
blockType = blocks.MatchingBlock
}
}
curlyIndex = nextCurlyIndex
}
return out, nil
}

View File

@@ -1,5 +0,0 @@
package blocks
func (b TemplateBlock) ParseListBlock(input string) (key string, value any, error error) {
return "", nil, nil
}

View File

@@ -1,18 +0,0 @@
package blocks
import (
"fmt"
"go.yaml.in/yaml/v4"
)
func (b TemplateBlock) ParseYamlBlock(input string) (key string, value any, error error) {
res := make(map[string]any)
err := yaml.Unmarshal([]byte(input), &res)
if err != nil {
return "", nil, fmt.Errorf("failed to parse yaml: %w", err)
}
return "", nil, nil
}

View File

@@ -1,80 +0,0 @@
package blocks
import (
"strings"
)
// TemplateType represents whether a template is short, long, or invalid.
type TemplateType int
const (
InvalidTemplate TemplateType = iota
ShortTemplate
ExtendedTemplate
)
// DetectTemplateType checks if the template is short or long.
func DetectTemplateType(tmpl string) TemplateType {
trimmed := strings.TrimSpace(tmpl)
// Short type: starts with "{" and ends with "}" on a single line,
// and contains "|" or "," inside for inline definition
// Matchs for example { name | text,required }
if strings.HasPrefix(trimmed, "{") &&
strings.HasSuffix(trimmed, "}") &&
!strings.Contains(trimmed, "\n") {
return ShortTemplate
}
// Long type: multiline and contains keys like "path:" or "codec:" inside
// Matches for example:
// {
// path: name
// codec: text
// required: true
// }
if strings.Contains(trimmed, "\n") &&
(strings.Contains(trimmed, "path:") || strings.Contains(trimmed, "codec:")) {
return ExtendedTemplate
}
return InvalidTemplate
}
type BlockType string
const (
DataBlock BlockType = "data" // content between lines "{" and "}"
MatchingBlock BlockType = "matching" // everything outside data blocks
)
type BlockField struct {
Path string
CodecType CodecType
Required bool
}
type TemplateBlock struct {
Type BlockType
Path string
Codec CodecType
Required bool
Fields []BlockField
content string
}
func (b TemplateBlock) GetContent() string {
return b.content
}
func (p *TemplateBlock) Parse(input string) (key string, value any, err error) {
switch p.Codec {
case CodecText:
return p.Path, input, nil
case CodecYaml:
return p.ParseYamlBlock(input)
case CodecList:
return p.ParseListBlock(input)
}
return p.Path, "", nil
}

View File

@@ -1,30 +0,0 @@
package blocks
import "fmt"
// CodecType represents the type of codec used to encode/render a value
type CodecType string
const (
CodecText CodecType = "text"
CodecNumber CodecType = "number"
CodecYaml CodecType = "yaml"
CodecList CodecType = "list"
CodecConst CodecType = "const"
)
func parseCodecType(input string) (CodecType, error) {
switch input {
case "number":
return CodecNumber, nil
case "yaml":
return CodecYaml, nil
case "list":
return CodecList, nil
case "text":
return CodecText, nil
case "const":
return CodecConst, nil
}
return CodecText, fmt.Errorf("unknown codec: '%s'", input)
}

View File

@@ -1,141 +0,0 @@
package blocks
import (
"fmt"
"strings"
"go.yaml.in/yaml/v4"
)
func cleanTemplate(input string) string {
s := strings.TrimSpace(input)
s = strings.TrimPrefix(s, "{")
s = strings.TrimSuffix(s, "}")
return s
}
func parseShortTemplate(input string) (TemplateBlock, error) {
var split = strings.Split(cleanTemplate(input), "|")
if len(split) < 1 {
return TemplateBlock{}, fmt.Errorf("Invalid Short Template")
}
block := TemplateBlock{
Type: DataBlock,
Path: strings.TrimSpace(split[0]),
Codec: CodecText,
content: input,
}
if len(split) > 1 {
var optionSplit = strings.Split(split[1], ",")
for _, option := range optionSplit {
switch strings.TrimSpace(option) {
case "required":
block.Required = true
case "number":
block.Codec = CodecNumber
}
}
}
return block, nil
}
type yamlBlock struct {
Path string `yaml:"path"`
Codec string `yaml:"codec"`
Required bool `yaml:"required,omitempty"`
Fields []yamlField `yaml:"fields"`
Item *struct {
Template string `yaml:"template,omitempty"`
} `yaml:"item,omitempty"`
Template string `yaml:"template,omitempty"`
}
type yamlField struct {
Path string `yaml:"path"`
Value any `yaml:"value,omitempty"`
Codec string `yaml:"codec"`
Required bool `yaml:"required"`
}
func parseYamlTemplate(input string) (block TemplateBlock, err error) {
var blk yamlBlock
cleaned := cleanTemplate(input)
dec := yaml.NewDecoder(strings.NewReader(cleaned))
dec.KnownFields(true)
if err := dec.Decode(&blk); err != nil {
return block, err
}
if blk.Path == "" {
return block, fmt.Errorf("missing top-level 'path'")
}
if blk.Codec == "" {
blk.Codec = "text"
}
codec, err := parseCodecType(blk.Codec)
if err != nil {
return block, fmt.Errorf("failed to parse codec: %w", err)
}
var fields []BlockField
for _, field := range blk.Fields {
if field.Path == "" {
return block, fmt.Errorf("failed to parse field: %v", field)
}
if field.Codec == "" {
field.Codec = "text"
}
fieldCodec, err := parseCodecType(field.Codec)
if err != nil {
return block, fmt.Errorf("failed to parse codec: %w", err)
}
fields = append(fields, BlockField{
Path: field.Path,
CodecType: fieldCodec,
Required: field.Required,
})
}
return TemplateBlock{
Type: DataBlock,
Path: blk.Path,
Codec: codec,
Fields: fields,
content: input,
}, nil
}
func ParseTemplateBlock(template string, blockType BlockType) (block TemplateBlock, err error) {
if blockType == MatchingBlock {
return TemplateBlock{
Type: MatchingBlock,
content: template,
}, nil
}
switch DetectTemplateType(template) {
case ShortTemplate:
return parseShortTemplate(template)
case ExtendedTemplate:
return parseYamlTemplate(template)
}
return block, fmt.Errorf("Invalid Template")
}

View File

@@ -1,55 +0,0 @@
package parser_test
import (
"testing"
"git.max-richter.dev/max/marka/parser"
"git.max-richter.dev/max/marka/parser/blocks"
"git.max-richter.dev/max/marka/registry"
)
func TestExtractBlocks(t *testing.T) {
src, err := registry.GetTemplate("recipe")
if err != nil {
t.Errorf("Failed to extract blocks: %s", err.Error())
t.FailNow()
}
templateBlocks, err := parser.ExtractBlocks(src)
if err != nil {
t.Errorf("Failed to extract blocks: %s", err.Error())
t.FailNow()
}
expected := []struct {
Type blocks.BlockType
Content string
}{
{blocks.MatchingBlock, "---\n"},
{blocks.DataBlock, "{\n path: .\n codec: yaml\n fields:\n - path: name\n codec: text\n required: true\n - path: image\n codec: text\n required: true\n - path: author.@type\n codec: const\n value: Person\n - path: author.name\n codec: text\n - path: datePublished\n codec: text\n - path: description\n codec: text\n - path: prepTime\n codec: text\n - path: cookTime\n codec: text\n - path: recipeYield\n codec: text\n}"},
{blocks.MatchingBlock, "\n---\n\n# "},
{blocks.DataBlock, "{ name | text,required }"},
{blocks.MatchingBlock, "\n\n"},
{blocks.DataBlock, "{ description | text }"},
{blocks.MatchingBlock, "\n\n## Ingredients\n"},
{blocks.DataBlock, "{\n path: recipeIngredient\n codec: list\n required: true\n item:\n template: \"- { . }\"\n}"},
{blocks.MatchingBlock, "\n\n## Steps\n"},
{blocks.DataBlock, "{\n path: recipeInstructions\n codec: list\n required: true\n item:\n template: \"{ @index }. { . }\"\n}"},
}
if len(templateBlocks) != len(expected) {
t.Fatalf("expected %d blocks, got %d", len(expected), len(templateBlocks))
}
for i, b := range templateBlocks {
exp := expected[i]
if b.Type != exp.Type {
t.Errorf("Block#%d Type '%s' did not match expected type '%s'", i, b.Type, exp.Type)
}
content := b.GetContent()
if content != exp.Content {
t.Errorf("Block#%d Content '%s' did not match expected Content: '%s'", i, content, exp.Content)
}
}
}

View File

@@ -0,0 +1,41 @@
// Package decoders contains functions for parsing template.Block to a string.
package decoders
import (
"fmt"
"git.max-richter.dev/max/marka/parser/matcher"
"git.max-richter.dev/max/marka/parser/utils"
"git.max-richter.dev/max/marka/template"
)
func ParseBlock(input string, block template.Block) (any, error) {
switch block.Codec {
case template.CodecText:
return input, nil
case template.CodecYaml:
return Yaml(input, block)
case template.CodecList:
return List(input, block)
}
return nil, fmt.Errorf("unknown codec: %s", block.Codec)
}
func Parse(matches []matcher.Block) (any, error) {
var result any
for _, m := range matches {
if m.Block.Path == "@index" {
continue
}
input := m.GetContent()
value, err := ParseBlock(input, m.Block)
if err != nil {
return nil, fmt.Errorf("failed to parse block(%s): %w", m.Block.Path, err)
}
result = utils.SetPathValue(m.Block.Path, value, result)
}
return result, nil
}

View File

@@ -0,0 +1,56 @@
package decoders_test
import (
"encoding/json"
"fmt"
"testing"
"git.max-richter.dev/max/marka/parser/decoders"
"git.max-richter.dev/max/marka/parser/matcher"
"git.max-richter.dev/max/marka/parser/utils"
"git.max-richter.dev/max/marka/registry"
"git.max-richter.dev/max/marka/template"
)
func TestParseBaguette(t *testing.T) {
recipeMd := utils.ReadTestDataFile(t, "baguette.md")
templateContent, err := registry.GetTemplate("Recipe")
if err != nil {
t.Fatalf("Err: %s", err)
}
blocks, err := template.CompileTemplate(templateContent)
if err != nil {
t.Fatalf("Err: %s", err)
}
matches := matcher.MatchBlocksFuzzy(recipeMd, blocks, 0.3)
parsed, err := decoders.Parse(matches)
if err != nil {
t.Fatalf("Err: %s", err)
}
expected := map[string]any{
"name": "Baguette",
"description": "My favourite baguette recipe",
"recipeIngredient": []string{"Flour", "Water", "Salt"},
"recipeInstructions": []string{
"Mix Flour Water and Salt",
"Bake the bread",
},
}
out, _ := json.MarshalIndent(parsed, "", " ")
fmt.Printf("Parsed: \n%s\n", string(out))
outMap, ok := parsed.(map[string]any)
if !ok {
t.Fatalf("expected parsed to be map[string]any, got %T", parsed)
}
for k, v := range expected {
if fmt.Sprintf("%v", outMap[k]) != fmt.Sprintf("%v", v) {
t.Errorf("Expected %v but got %v", v, outMap[k])
}
}
}

31
parser/decoders/list.go Normal file
View File

@@ -0,0 +1,31 @@
package decoders
import (
"fmt"
"strings"
"git.max-richter.dev/max/marka/parser/matcher"
"git.max-richter.dev/max/marka/template"
)
func List(input string, block template.Block) (value any, error error) {
blocks, err := template.CompileTemplate(block.ListTemplate)
if err != nil {
return nil, fmt.Errorf("cannot extract blocks: %w", err)
}
var out []any
for line := range strings.SplitSeq(strings.TrimSuffix(input, "\n"), "\n") {
matches := matcher.MatchBlocksFuzzy(line, blocks, 0.3)
res, err := Parse(matches)
if err != nil {
return nil, fmt.Errorf("could not match blocks: %w", err)
}
out = append(out, res)
}
return out, nil
}

View File

@@ -0,0 +1,90 @@
package decoders_test
import (
"reflect"
"testing"
"git.max-richter.dev/max/marka/parser/decoders"
"git.max-richter.dev/max/marka/template"
)
func TestDecodeListObject(t *testing.T) {
templateBlock := template.Block{
Path: "ingredients",
Codec: template.CodecList,
ListTemplate: "- { amount } { type }",
}
input := "- 10g flour\n- 1/2cup water\n- 1tsp salt"
parsed, err := decoders.List(input, templateBlock)
if err != nil {
t.Fatalf("Err: %s", err)
}
want := []any{
map[string]any{
"amount": "10g",
"type": "flour",
},
map[string]any{
"amount": "1/2cup",
"type": "water",
},
map[string]any{
"amount": "1tsp",
"type": "salt",
},
}
if !reflect.DeepEqual(parsed, want) {
t.Fatalf("unexpected result.\n got: %#v\nwant: %#v", parsed, want)
}
}
func TestDecodeListString(t *testing.T) {
templateBlock := template.Block{
Path: "ingredients",
Codec: template.CodecList,
ListTemplate: "- { . }",
}
input := "- flour\n- water\n- salt"
parsed, err := decoders.List(input, templateBlock)
if err != nil {
t.Fatalf("Err: %s", err)
}
want := []any{
"flour",
"water",
"salt",
}
if !reflect.DeepEqual(parsed, want) {
t.Fatalf("unexpected result.\n got: %#v\nwant: %#v", parsed, want)
}
}
func TestDecodeNumberedListString(t *testing.T) {
templateBlock := template.Block{
Path: "ingredients",
Codec: template.CodecList,
ListTemplate: "{ @index } { . }",
}
input := "1. Wash and dry the lettuce.\n2. Halve the cherry tomatoes.\n3. Toss with olive oil and salt."
parsed, err := decoders.List(input, templateBlock)
if err != nil {
t.Fatalf("Err: %s", err)
}
want := []any{
"Wash and dry the lettuce.",
"Halve the cherry tomatoes.",
"Toss with olive oil and salt.",
}
if !reflect.DeepEqual(parsed, want) {
t.Fatalf("unexpected result.\n got: %#v\nwant: %#v", parsed, want)
}
}

32
parser/decoders/yaml.go Normal file
View File

@@ -0,0 +1,32 @@
package decoders
import (
"fmt"
"git.max-richter.dev/max/marka/parser/utils"
"git.max-richter.dev/max/marka/template"
"go.yaml.in/yaml/v4"
)
func Yaml(input string, block template.Block) (value any, error error) {
res := make(map[string]any)
err := yaml.Unmarshal([]byte(input), &res)
if err != nil {
return nil, fmt.Errorf("failed to parse yaml '%q': %w", input, err)
}
var out any
for _, f := range block.Fields {
if f.CodecType == template.CodecConst {
if f.Value != nil {
out = utils.SetPathValue(f.Path, f.Value, out)
}
} else {
if value, ok := res[f.Path]; ok {
out = utils.SetPathValue(f.Path, value, out)
}
}
}
return out, nil
}

View File

@@ -4,4 +4,7 @@ go 1.24.3
require github.com/agext/levenshtein v1.2.3
require go.yaml.in/yaml/v4 v4.0.0-rc.1 // indirect
require (
github.com/google/go-cmp v0.7.0
go.yaml.in/yaml/v4 v4.0.0-rc.1 // indirect
)

View File

@@ -1,4 +1,6 @@
github.com/agext/levenshtein v1.2.3 h1:YB2fHEn0UJagG8T1rrWknE3ZQzWM06O8AMAatNn7lmo=
github.com/agext/levenshtein v1.2.3/go.mod h1:JEDfjyjHDjOF/1e4FlBE/PkbqA9OfWu2ki2W0IB5558=
github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
go.yaml.in/yaml/v4 v4.0.0-rc.1 h1:4J1+yLKUIPGexM/Si+9d3pij4hdc7aGO04NhrElqXbY=
go.yaml.in/yaml/v4 v4.0.0-rc.1/go.mod h1:CBdeces52/nUXndfQ5OY8GEQuNR9uEEOJPZj/Xq5IzU=

View File

@@ -2,16 +2,72 @@
// structured JSON objects that conform to a JSON Schema.
package parser
func ParseFile(markdownContent string) (map[string]any, error) {
import (
"fmt"
"strings"
// _schema, err := registry.GetTemplate("Recipe")
// if err != nil {
// return nil, fmt.Errorf("could not get schema: %w", err)
// }
"git.max-richter.dev/max/marka/parser/decoders"
"git.max-richter.dev/max/marka/parser/matcher"
"git.max-richter.dev/max/marka/registry"
"git.max-richter.dev/max/marka/template"
)
// Idea is to split the template into blocks, either "matching" blocks which are simple strings.
// Or "data" blocks which match the content. Then i want to soft match the "matching" blocks and "data" blocks to the template.
// The "matching" blocks should soft match with a levenshtein distance
func DetectType(markdownContent string) (string, error) {
defaultSchemaContent, err := registry.GetTemplate("_default")
if err != nil {
return "", fmt.Errorf("could not get schema: %w", err)
}
return map[string]any{}, nil
defaultSchema, err := template.CompileTemplate(defaultSchemaContent)
if err != nil {
return "", fmt.Errorf("failed to compile template: %w", err)
}
blocks := matcher.MatchBlocksFuzzy(markdownContent, defaultSchema, 0.3)
result, err := decoders.Parse(blocks)
if err != nil {
return "", fmt.Errorf("failed to parse blocks: %w", err)
}
if result, ok := result.(map[string]any); ok {
if contentType, ok := result["@type"]; ok {
return contentType.(string), nil
} else {
return "", fmt.Errorf("frontmatter did not contain '@type'")
}
} else {
return "", fmt.Errorf("could not parse frontmatter")
}
}
func ParseFile(markdownContent string) (any, error) {
markdownContent = strings.TrimSuffix(
strings.ReplaceAll(markdownContent, "@type:", `"@type":`),
"\n",
)
contentType, err := DetectType(markdownContent)
if err != nil {
return nil, fmt.Errorf("could not detect type: %w", err)
}
templateContent, err := registry.GetTemplate(contentType)
if err != nil {
return nil, fmt.Errorf("could not get schema: %w", err)
}
template, err := template.CompileTemplate(templateContent)
if err != nil {
return nil, fmt.Errorf("failed to compile template: %w", err)
}
blocks := matcher.MatchBlocksFuzzy(markdownContent, template, 0.3)
result, err := decoders.Parse(blocks)
if err != nil {
return nil, fmt.Errorf("failed to parse blocks: %w", err)
}
return result, nil
}

View File

@@ -4,10 +4,10 @@ import (
"encoding/json"
"os"
"path/filepath"
"reflect"
"testing"
"git.max-richter.dev/max/marka/parser"
"github.com/google/go-cmp/cmp"
)
func TestParseRecipe_Golden(t *testing.T) {
@@ -34,10 +34,7 @@ func TestParseRecipe_Golden(t *testing.T) {
t.Fatalf("unmarshal expected.json: %v", err)
}
// Deep structural compare
if !reflect.DeepEqual(want, got) {
gb, _ := json.MarshalIndent(got, "", " ")
wb, _ := json.MarshalIndent(want, "", " ")
t.Fatalf("parsed JSON mismatch\n--- got ---\n%s\n--- want ---\n%s", string(gb), string(wb))
if diff := cmp.Diff(want, got); diff != "" {
t.Fatalf("JSON mismatch (-want +got):\n%s", diff)
}
}

View File

@@ -1,19 +1,22 @@
package parser
// Package matcher contains functions for matching template.Block to a string.
package matcher
import (
"math"
"git.max-richter.dev/max/marka/parser/blocks"
"git.max-richter.dev/max/marka/parser/utils"
"git.max-richter.dev/max/marka/template"
"github.com/agext/levenshtein"
)
type MatchBlock struct {
// Block matches a template.Block to a section inside a string
type Block struct {
Start, End int
Block blocks.TemplateBlock
Block template.Block
src *string
}
func (m MatchBlock) GetContent() string {
func (m Block) GetContent() string {
if m.src == nil || m.Start < 0 || m.End > len(*m.src) || m.Start > m.End {
return ""
}
@@ -23,18 +26,18 @@ func (m MatchBlock) GetContent() string {
// MatchBlocksFuzzy finds anchor positions for all BlockMatching blocks using
// Levenshtein distance (tolerant matching), then returns ONLY the BlockData
// segments as gaps between those anchors.
func MatchBlocksFuzzy(markdown string, templateBlocks []blocks.TemplateBlock, maxDist float64) []MatchBlock {
var out []MatchBlock
func MatchBlocksFuzzy(markdown string, templateBlocks []template.Block, maxDist float64) []Block {
var out []Block
var lastIndex = 0
lastIndex := 0
for i, b := range templateBlocks {
if b.Type == blocks.MatchingBlock {
if b.Type == template.MatchingBlock {
start, end := FuzzyFind(markdown, lastIndex, b.GetContent(), 0.3)
if end != -1 {
if i > 0 {
previousBlock := templateBlocks[i-1]
if previousBlock.Type == blocks.DataBlock {
out = append(out, MatchBlock{
if previousBlock.Type == template.DataBlock {
out = append(out, Block{
Start: lastIndex,
End: start,
Block: previousBlock,
@@ -48,14 +51,16 @@ func MatchBlocksFuzzy(markdown string, templateBlocks []blocks.TemplateBlock, ma
}
// Handle the last block
lastBlock := templateBlocks[len(templateBlocks)-1]
if lastBlock.Type == blocks.DataBlock {
out = append(out, MatchBlock{
Start: lastIndex,
End: len(markdown),
Block: lastBlock,
src: &markdown,
})
if len(templateBlocks) > 0 {
lastBlock := templateBlocks[len(templateBlocks)-1]
if lastBlock.Type == template.DataBlock {
out = append(out, Block{
Start: lastIndex,
End: len(markdown),
Block: lastBlock,
src: &markdown,
})
}
}
return out
@@ -72,7 +77,7 @@ func FuzzyFind(haystack string, from int, needle string, maxDist float64) (start
sub := haystack[i : i+windowSize]
dist := levenshtein.Distance(sub, needle, nil)
maxLen := max(needleLen, windowSize)
norm := float64(dist)/float64(maxLen) + float64(abs(windowSize-needleLen))*0.01/float64(maxLen)
norm := float64(dist)/float64(maxLen) + float64(utils.Abs(windowSize-needleLen))*0.01/float64(maxLen)
if norm < bestDist {
bestStart, bestEnd, bestDist = i, i+windowSize, norm
@@ -88,17 +93,3 @@ func FuzzyFind(haystack string, from int, needle string, maxDist float64) (start
}
return -1, -1
}
func abs(x int) int {
if x < 0 {
return -x
}
return x
}
func max(a, b int) int {
if a > b {
return a
}
return b
}

View File

@@ -1,14 +1,17 @@
package parser_test
package matcher_test
import (
"fmt"
"testing"
"git.max-richter.dev/max/marka/parser"
"git.max-richter.dev/max/marka/parser/matcher"
"git.max-richter.dev/max/marka/parser/utils"
"git.max-richter.dev/max/marka/registry"
"git.max-richter.dev/max/marka/template"
)
func TestFuzzyFindAll(t *testing.T) {
recipeMd := readTestDataFile(t, "baguette.md")
recipeMd := utils.ReadTestDataFile(t, "baguette.md")
tests := []struct {
Needle string
@@ -25,24 +28,32 @@ func TestFuzzyFindAll(t *testing.T) {
}
for _, test := range tests {
start, end := parser.FuzzyFind(recipeMd, test.StartIndex, test.Needle, 0.3) // allow 50% error
start, end := matcher.FuzzyFind(recipeMd, test.StartIndex, test.Needle, 0.3) // allow 50% error
if start != test.Start || end != test.End {
t.Errorf("Start or end do not match: Needle=%q Start=%d/%d End=%d/%d", test.Needle, test.Start, start, test.End, end)
}
}
}
func TestFuzzyBlockMatch(t *testing.T) {
recipeMd := readTestDataFile(t, "baguette.md")
schemaMd, err := registry.GetTemplate("recipe")
recipeMd := utils.ReadTestDataFile(t, "baguette.md")
schemaMd, err := registry.GetTemplate("Recipe")
if err != nil {
t.Errorf("Failed to load template: %s", err.Error())
t.FailNow()
}
blocks, _ := parser.ExtractBlocks(schemaMd)
matches := parser.MatchBlocksFuzzy(recipeMd, blocks, 0.3)
blocks, err := template.CompileTemplate(schemaMd)
if err != nil {
t.Errorf("Failed to compile template: %s", err.Error())
t.FailNow()
}
for _, b := range blocks {
fmt.Printf("block: %#v\n", b)
}
matches := matcher.MatchBlocksFuzzy(recipeMd, blocks, 0.3)
expected := []struct {
value string
@@ -73,5 +84,4 @@ func TestFuzzyBlockMatch(t *testing.T) {
t.Errorf("Match %d did not match expected: %q", i, m.GetContent())
}
}
}

View File

@@ -1,15 +0,0 @@
package parser
func Parse(blocks []MatchBlock) map[string]any {
result := make(map[string]any)
for _, b := range blocks {
input := b.GetContent()
key, value, _ := b.Block.Parse(input)
result[key] = value
}
return result
}

View File

@@ -1,42 +0,0 @@
package parser_test
import (
"fmt"
"testing"
"git.max-richter.dev/max/marka/parser"
"git.max-richter.dev/max/marka/registry"
)
func TestParseBaguette(t *testing.T) {
recipeMd := readTestDataFile(t, "baguette.md")
template, err := registry.GetTemplate("recipe")
if err != nil {
t.Fatalf("Err: %s", err)
}
blocks, err := parser.ExtractBlocks(template)
if err != nil {
t.Fatalf("Err: %s", err)
}
matches := parser.MatchBlocksFuzzy(recipeMd, blocks, 0.3)
parsed := parser.Parse(matches)
expected := map[string]any{
"name": "Baguette",
"description": "My favourite baguette recipe",
"recipeIngredient": []string{"Flour", "Water", "Salt"},
// "recipeInstructions": []string{
// "Mix Flour Water and Salt",
// "Bake the bread",
// },
}
for k, v := range expected {
if fmt.Sprintf("%v", parsed[k]) != fmt.Sprintf("%v", v) {
t.Errorf("Expected %v but got %v", v, parsed[k])
}
}
}

View File

@@ -1,8 +1,7 @@
---
@type: Recipe
image: https://example.com/salad.jpg
author: Alex Chef
datePublished: 2025-08-12
author.name: Alex Chef
prepTime: PT10M
cookTime: PT0M
recipeYield: 2 servings

View File

@@ -1,5 +1,5 @@
{
"@context": "https://schema.org/",
"@context": "https://schema.org",
"@type": "Recipe",
"name": "Simple Salad",
"image": "https://example.com/salad.jpg",
@@ -7,7 +7,6 @@
"@type": "Person",
"name": "Alex Chef"
},
"datePublished": "2025-08-12",
"description": "A quick green salad.",
"prepTime": "PT10M",
"cookTime": "PT0M",

8
parser/utils/math.go Normal file
View File

@@ -0,0 +1,8 @@
package utils
func Abs(x int) int {
if x < 0 {
return -x
}
return x
}

View File

@@ -1,4 +1,4 @@
package parser_test
package utils
import (
"os"
@@ -6,8 +6,8 @@ import (
"testing"
)
func readTestDataFile(t *testing.T, fileName string) string {
path := filepath.Join("testdata", fileName)
func ReadTestDataFile(t *testing.T, fileName string) string {
path := filepath.Join("../testdata", fileName)
data, err := os.ReadFile(path)
if err != nil {
t.Fatalf("failed to read test data file: %v", err)

View File

@@ -0,0 +1,65 @@
// Package utils contains utility functions for the parser package.
package utils
import (
"maps"
"strings"
)
// SetPathValue sets value at a dot-separated path in obj, creating maps as needed.
// If the path is only dots (e.g., "." or ".."):
// - when obj == nil -> returns value directly (e.g., "flour")
// - when obj is a map -> if value is a map[string]any, merge into obj; otherwise obj is unchanged.
// - otherwise -> returns obj unchanged.
func SetPathValue(path string, value any, obj any) any {
// Split and drop empty segments (so ".", "..", "" become no keys)
raw := strings.Split(path, ".")
keys := raw[:0]
for _, k := range raw {
if k != "" {
keys = append(keys, k)
}
}
// Root case: no keys after trimming dots
if len(keys) == 0 {
if obj == nil {
return value
}
if m, ok := obj.(map[string]any); ok {
if mv, ok := value.(map[string]any); ok {
maps.Copy(m, mv)
}
return m
}
return obj
}
// Ensure root is a map
var root map[string]any
if obj == nil {
root = map[string]any{}
} else if m, ok := obj.(map[string]any); ok {
root = m
} else {
// if obj is not a map, just overwrite it with a new map
root = map[string]any{}
}
// Descend/construct maps
curr := root
for i, k := range keys {
if i == len(keys)-1 {
curr[k] = value
break
}
if next, ok := curr[k].(map[string]any); ok {
curr = next
continue
}
n := map[string]any{}
curr[k] = n
curr = n
}
return root
}

View File

@@ -0,0 +1,132 @@
package utils
import (
"fmt"
"reflect"
"testing"
)
func TestSetPathValue_SingleKey(t *testing.T) {
input := map[string]any{}
got := SetPathValue("name", "Max", input)
want := map[string]any{"name": "Max"}
if !reflect.DeepEqual(got, want) {
t.Fatalf("unexpected map.\n got: %#v\nwant: %#v", got, want)
}
}
func TestSetPathValue_DotSyntax(t *testing.T) {
input := map[string]any{}
meta := map[string]any{
"name": "Max",
}
got := SetPathValue(".", meta, input)
want := map[string]any{"name": "Max"}
fmt.Printf("%+v\n", got)
if !reflect.DeepEqual(got, want) {
t.Fatalf("unexpected map.\n got: %#v\nwant: %#v", got, want)
}
}
func TestSetPathValue_DotSyntaxString(t *testing.T) {
var input any
meta := "flour"
got := SetPathValue(".", meta, input)
want := "flour"
fmt.Printf("%+v\n", got)
if !reflect.DeepEqual(got, want) {
t.Fatalf("unexpected map.\n got: %#v\nwant: %#v", got, want)
}
}
func TestSetPathValue_NestedKeys_CreateMissingMaps(t *testing.T) {
input := map[string]any{}
got := SetPathValue("user.profile.name", "Max", input)
// Desired behavior: create nested maps and set the value.
// NOTE: If this test fails, your implementation likely isn't descending into nested maps.
want := map[string]any{
"user": map[string]any{
"profile": map[string]any{
"name": "Max",
},
},
}
if !reflect.DeepEqual(got, want) {
t.Fatalf("unexpected map for nested keys.\n got: %#v\nwant: %#v", got, want)
}
}
func TestSetPathValue_OverwriteExistingValue(t *testing.T) {
input := map[string]any{"foo": "old"}
got := SetPathValue("foo", "new", input)
want := map[string]any{"foo": "new"}
if !reflect.DeepEqual(got, want) {
t.Fatalf("unexpected map after overwrite.\n got: %#v\nwant: %#v", got, want)
}
}
func TestSetPathValue_PartiallyExistingPath(t *testing.T) {
input := map[string]any{
"user": map[string]any{
"profile": map[string]any{},
},
}
got := SetPathValue("user.profile.age", 28, input)
want := map[string]any{
"user": map[string]any{
"profile": map[string]any{
"age": 28,
},
},
}
if !reflect.DeepEqual(got, want) {
t.Fatalf("unexpected map with partially existing path.\n got: %#v\nwant: %#v", got, want)
}
}
func TestSetPathValue_EmptySegmentsAreIgnored(t *testing.T) {
input := map[string]any{}
got := SetPathValue("a..b", 1, input)
// Expected behavior (common-sense): treat empty segments as no-op and still set a.b = 1
want := map[string]any{
"a": map[string]any{
"b": 1,
},
}
if !reflect.DeepEqual(got, want) {
t.Fatalf("unexpected map with empty segments.\n got: %#v\nwant: %#v", got, want)
}
}
func TestSetPathValue_ComplexValueTypes(t *testing.T) {
input := map[string]any{}
val := []int{1, 2, 3}
got := SetPathValue("nums.list", val, input)
want := map[string]any{
"nums": map[string]any{
"list": []int{1, 2, 3},
},
}
if !reflect.DeepEqual(got, want) {
t.Fatalf("unexpected map with complex value types.\n got: %#v\nwant: %#v", got, want)
}
}