feat: refactor some shit

This commit is contained in:
Max Richter
2025-08-17 00:46:45 +02:00
parent 43644c4f40
commit cc8f967f07
20 changed files with 459 additions and 209 deletions

View File

@@ -1,51 +1,32 @@
package parser package parser
type BlockType string import (
"fmt"
const ( "git.max-richter.dev/max/marka/parser/blocks"
BlockData BlockType = "data" // content between lines "{" and "}"
BlockMatching BlockType = "matching" // everything outside data blocks
) )
type Block struct {
Type BlockType
Start, End int // byte offsets [Start, End)
src *string
}
func (b Block) GetContent() string {
if b.src == nil || b.Start < 0 || b.End > len(*b.src) || b.Start > b.End {
return ""
}
return (*b.src)[b.Start:b.End]
}
// ExtractBlocks scans once, emitting: // ExtractBlocks scans once, emitting:
// - data blocks: inner content between a line that's exactly "{" and a line that's exactly "}" // - data blocks: inner content between a line that's exactly "{" and a line that's exactly "}"
// - matching blocks: gaps between data blocks (excluding the brace lines themselves) // - matching blocks: gaps between data blocks (excluding the brace lines themselves)
func ExtractBlocks(src string) []Block { func ExtractBlocks(template string) ([]blocks.TemplateBlock, error) {
var out []Block var out []blocks.TemplateBlock
var curlyIndex int var curlyIndex int
const CLOSING = '}' const CLOSING = '}'
const OPENING = '{' const OPENING = '{'
if len(src) > 0 && src[0] == OPENING { var start int
var blockType blocks.BlockType
if len(template) > 0 && template[0] == OPENING {
curlyIndex = 1 curlyIndex = 1
out = append(out, Block{ blockType = blocks.DataBlock
Start: 0,
Type: BlockData,
src: &src,
})
} else { } else {
out = append(out, Block{ blockType = blocks.MatchingBlock
Start: 0,
Type: BlockMatching,
src: &src,
})
} }
for i, r := range src { for i, r := range template {
var nextCurlyIndex = curlyIndex var nextCurlyIndex = curlyIndex
@@ -57,41 +38,42 @@ func ExtractBlocks(src string) []Block {
} }
var nextChar rune = ' ' var nextChar rune = ' '
if i+1 < len(src) { if i+1 < len(template) {
nextChar = rune(src[i+1]) nextChar = rune(template[i+1])
} }
if curlyIndex == 0 && nextCurlyIndex == 1 { if curlyIndex == 0 && nextCurlyIndex == 1 {
out[len(out)-1].End = i block, err := blocks.ParseTemplateBlock(template[start:i], blockType)
out = append(out, Block{ if err != nil {
Start: i, return nil, fmt.Errorf("Failed to parse block: %w", err)
Type: BlockData, }
src: &src, out = append(out, block)
}) start = i
blockType = blocks.DataBlock
} else if curlyIndex == 1 && nextCurlyIndex == 0 { } else if curlyIndex == 1 && nextCurlyIndex == 0 {
out[len(out)-1].End = i + 1
block, err := blocks.ParseTemplateBlock(template[start:i+1], blockType)
if err != nil {
return nil, fmt.Errorf("Failed to parse block: %w", err)
}
out = append(out, block)
if nextChar == OPENING { if nextChar == OPENING {
out = append(out, Block{ start = i + 1
Start: i + 1, blockType = blocks.DataBlock
Type: BlockData,
src: &src,
})
} else { } else {
out = append(out, Block{ start = i + 1
Start: i + 1, blockType = blocks.MatchingBlock
Type: BlockMatching,
src: &src,
})
} }
} }
curlyIndex = nextCurlyIndex curlyIndex = nextCurlyIndex
} }
var lastBlock = out[len(out)-1] // var lastBlock = out[len(out)-1]
if lastBlock.End == 0 { // if lastBlock.End == 0 {
out = out[:len(out)-1] // out = out[:len(out)-1]
} // }
return out return out, nil
} }

98
parser/blocks/blocks.go Normal file
View File

@@ -0,0 +1,98 @@
package blocks
import (
"fmt"
"strings"
)
// TemplateType represents whether a template is short, long, or invalid.
type TemplateType int
const (
InvalidTemplate TemplateType = iota
ShortTemplate
LongTemplate
)
// DetectTemplateType checks if the template is short or long.
func DetectTemplateType(tmpl string) TemplateType {
trimmed := strings.TrimSpace(tmpl)
// Short type: starts with "{" and ends with "}" on a single line,
// and contains "|" or "," inside for inline definition
// Matchs for example { name | text,required }
if strings.HasPrefix(trimmed, "{") &&
strings.HasSuffix(trimmed, "}") &&
!strings.Contains(trimmed, "\n") {
return ShortTemplate
}
// Long type: multiline and contains keys like "path:" or "codec:" inside
// Matches for example:
// {
// path: name
// codec: text
// required: true
// }
if strings.Contains(trimmed, "\n") &&
(strings.Contains(trimmed, "path:") || strings.Contains(trimmed, "codec:")) {
return LongTemplate
}
return InvalidTemplate
}
// CodecType represents the type of codec used to encode/render a value
type CodecType string
const (
CodecText CodecType = "text"
CodecNumber CodecType = "number"
CodecYaml CodecType = "yaml"
CodecList CodecType = "list"
)
func parseCodecType(input string) (CodecType, error) {
switch input {
case "number":
return CodecNumber, nil
case "yaml":
return CodecYaml, nil
case "list":
return CodecList, nil
case "text":
return CodecText, nil
}
return CodecText, fmt.Errorf("unknown codec: '%s'", input)
}
type BlockType string
const (
DataBlock BlockType = "data" // content between lines "{" and "}"
MatchingBlock BlockType = "matching" // everything outside data blocks
)
type TemplateBlock struct {
Type BlockType
Path string
Codec CodecType
Required bool
content string
}
func (b TemplateBlock) GetContent() string {
return b.content
}
func (p *TemplateBlock) Parse(input string) (key string, value any, err error) {
switch p.Codec {
case CodecText:
return p.Path, input, nil
case CodecYaml:
return p.ParseYamlBlock(input)
case CodecList:
return p.ParseListBlock(input)
}
return p.Path, "", nil
}

View File

@@ -0,0 +1,10 @@
package blocks
import "fmt"
func (b TemplateBlock) ParseListBlock(input string) (key string, value any, error error) {
fmt.Printf("Parsing List: '%q'", input)
return "", nil, nil
}

118
parser/blocks/template.go Normal file
View File

@@ -0,0 +1,118 @@
package blocks
import (
"fmt"
"strings"
"go.yaml.in/yaml/v4"
)
func cleanTemplate(input string) string {
s := strings.TrimSpace(input)
s = strings.TrimPrefix(s, "{")
s = strings.TrimSuffix(s, "}")
return s
}
func parseShortTemplate(input string) (TemplateBlock, error) {
var split = strings.Split(cleanTemplate(input), "|")
if len(split) < 1 {
return TemplateBlock{}, fmt.Errorf("Invalid Short Template")
}
block := TemplateBlock{
Type: DataBlock,
Path: strings.TrimSpace(split[0]),
Codec: CodecText,
content: input,
}
if len(split) > 1 {
var optionSplit = strings.Split(split[1], ",")
for _, option := range optionSplit {
switch strings.TrimSpace(option) {
case "required":
block.Required = true
case "number":
block.Codec = CodecNumber
}
}
}
return block, nil
}
type yamlBlock struct {
Path string `yaml:"path"`
Codec string `yaml:"codec"`
Required bool `yaml:"required,omitempty"`
Fields []yamlField `yaml:"fields"`
Item *struct {
Template string `yaml:"template,omitempty"`
} `yaml:"item,omitempty"`
Template string `yaml:"template,omitempty"`
}
type yamlField struct {
Path string `yaml:"path"`
Value any `yaml:"value,omitempty"`
Codec string `yaml:"codec"`
Required bool `yaml:"required"`
}
func parseYamlTemplate(input string) (block TemplateBlock, err error) {
var blk yamlBlock
cleaned := cleanTemplate(input)
dec := yaml.NewDecoder(strings.NewReader(cleaned))
dec.KnownFields(true)
if err := dec.Decode(&blk); err != nil {
fmt.Printf("Failed to parse:\n---\n%s\n---\n", cleaned)
return block, err
}
if blk.Path == "" {
return block, fmt.Errorf("missing top-level 'path'")
}
codec, err := parseCodecType(blk.Codec)
if err != nil {
return block, fmt.Errorf("failed to parse codec: %w", err)
}
return TemplateBlock{
Type: DataBlock,
Path: blk.Path,
Codec: codec,
content: input,
}, nil
}
func ParseTemplateBlock(template string, blockType BlockType) (block TemplateBlock, err error) {
if blockType == MatchingBlock {
return TemplateBlock{
Type: MatchingBlock,
content: template,
}, nil
}
block.Type = DataBlock
block.content = template
templateType := DetectTemplateType(template)
if templateType == InvalidTemplate {
return block, fmt.Errorf("Invalid Template")
}
if templateType == ShortTemplate {
return parseShortTemplate(template)
}
return parseYamlTemplate(template)
}

View File

@@ -0,0 +1,18 @@
package blocks
import (
"fmt"
"go.yaml.in/yaml/v4"
)
func (b TemplateBlock) ParseYamlBlock(input string) (key string, value any, error error) {
res := make(map[string]any)
err := yaml.Unmarshal([]byte(input), &res)
if err != nil {
return "", nil, fmt.Errorf("failed to parse yaml: %w", err)
}
return "", nil, nil
}

View File

@@ -1,50 +1,54 @@
package parser package parser_test
import ( import (
"os"
"path/filepath"
"strings"
"testing" "testing"
"git.max-richter.dev/max/marka/parser"
"git.max-richter.dev/max/marka/parser/blocks"
"git.max-richter.dev/max/marka/registry"
) )
func readFile(t *testing.T, fileName string) string {
path := filepath.Join("testdata", fileName)
data, err := os.ReadFile(path)
if err != nil {
t.Fatalf("failed to read test data file: %v", err)
}
return string(data)
}
func TestExtractBlocks(t *testing.T) { func TestExtractBlocks(t *testing.T) {
src := readFile(t, "recipe.schema.md") src, err := registry.GetTemplate("recipe")
blocks := ExtractBlocks(src) if err != nil {
t.Errorf("Failed to extract blocks: %s", err.Error())
t.FailNow()
}
templateBlocks, err := parser.ExtractBlocks(src)
if err != nil {
t.Errorf("Failed to extract blocks: %s", err.Error())
t.FailNow()
}
expected := []struct { expected := []struct {
Type BlockType Type blocks.BlockType
Content string Content string
}{ }{
{BlockMatching, "---\\n"}, {blocks.MatchingBlock, "---\n"},
{BlockData, "{ . }"}, {blocks.DataBlock, "{\n path: .\n codec: yaml\n fields:\n - path: name\n codec: text\n required: true\n - path: image\n codec: text\n required: true\n - path: author.@type\n codec: const\n value: Person\n - path: author.name\n codec: text\n - path: datePublished\n codec: text\n - path: description\n codec: text\n - path: prepTime\n codec: text\n - path: cookTime\n codec: text\n - path: recipeYield\n codec: text\n}"},
{BlockMatching, "\\n---\\n\\n# "}, {blocks.MatchingBlock, "\n---\n\n# "},
{BlockData, "{ name | text,required }"}, {blocks.DataBlock, "{ name | text,required }"},
{BlockMatching, "\\n\\n"}, {blocks.MatchingBlock, "\n\n"},
{BlockData, "{ description | text,optional }"}, {blocks.DataBlock, "{ description | text }"},
{BlockMatching, "\\n\\n## Ingredients\\n"}, {blocks.MatchingBlock, "\n\n## Ingredients\n"},
{BlockData, "{\\n path: recipeIngredient\\n codec: list\\n required: true\\n item:\\n template: \"- { . }\"\\n}"}, {blocks.DataBlock, "{\n path: recipeIngredient\n codec: list\n required: true\n item:\n template: \"- { . }\"\n}"},
{BlockMatching, "\\n\\n## Steps\\n"}, {blocks.MatchingBlock, "\n\n## Steps\n"},
{BlockData, "{\\n path: recipeInstructions\\n codec: list\\n required: true\\n item:\\n template: \"{ @index }. { . }\"\\n}"}, {blocks.DataBlock, "{\n path: recipeInstructions\n codec: list\n required: true\n item:\n template: \"{ @index }. { . }\"\n}"},
} }
if len(blocks) != len(expected) { if len(templateBlocks) != len(expected) {
t.Fatalf("expected %d blocks, got %d", len(expected), len(blocks)) t.Fatalf("expected %d blocks, got %d", len(expected), len(templateBlocks))
} }
for i, b := range blocks { for i, b := range templateBlocks {
exp := expected[i] exp := expected[i]
content := strings.ReplaceAll(b.GetContent(), "\n", "\\n") if b.Type != exp.Type {
if b.Type != exp.Type || content != exp.Content { t.Errorf("Block#%d Type '%s' did not match expected type '%s'", i, b.Type, exp.Type)
t.Errorf("Block %d: expected %v, got Type: %v, Start: %d, End: %d, Content: %s", i, exp, b.Type, b.Start, b.End, content) }
content := b.GetContent()
if content != exp.Content {
t.Errorf("Block#%d Content '%s' did not match expected Content: '%s'", i, content, exp.Content)
} }
} }

View File

@@ -3,3 +3,5 @@ module git.max-richter.dev/max/marka/parser
go 1.24.3 go 1.24.3
require github.com/agext/levenshtein v1.2.3 require github.com/agext/levenshtein v1.2.3
require go.yaml.in/yaml/v4 v4.0.0-rc.1 // indirect

View File

@@ -1,2 +1,4 @@
github.com/agext/levenshtein v1.2.3 h1:YB2fHEn0UJagG8T1rrWknE3ZQzWM06O8AMAatNn7lmo= github.com/agext/levenshtein v1.2.3 h1:YB2fHEn0UJagG8T1rrWknE3ZQzWM06O8AMAatNn7lmo=
github.com/agext/levenshtein v1.2.3/go.mod h1:JEDfjyjHDjOF/1e4FlBE/PkbqA9OfWu2ki2W0IB5558= github.com/agext/levenshtein v1.2.3/go.mod h1:JEDfjyjHDjOF/1e4FlBE/PkbqA9OfWu2ki2W0IB5558=
go.yaml.in/yaml/v4 v4.0.0-rc.1 h1:4J1+yLKUIPGexM/Si+9d3pij4hdc7aGO04NhrElqXbY=
go.yaml.in/yaml/v4 v4.0.0-rc.1/go.mod h1:CBdeces52/nUXndfQ5OY8GEQuNR9uEEOJPZj/Xq5IzU=

17
parser/main.go Normal file
View File

@@ -0,0 +1,17 @@
// Package parser provides functions for parsing Markdown templates into
// structured JSON objects that conform to a JSON Schema.
package parser
func ParseFile(markdownContent string) (map[string]any, error) {
// _schema, err := registry.GetTemplate("Recipe")
// if err != nil {
// return nil, fmt.Errorf("could not get schema: %w", err)
// }
// Idea is to split the template into blocks, either "matching" blocks which are simple strings.
// Or "data" blocks which match the content. Then i want to soft match the "matching" blocks and "data" blocks to the template.
// The "matching" blocks should soft match with a levenshtein distance
return map[string]any{}, nil
}

43
parser/main_test.go Normal file
View File

@@ -0,0 +1,43 @@
package parser_test
import (
"encoding/json"
"os"
"path/filepath"
"reflect"
"testing"
"git.max-richter.dev/max/marka/parser"
)
func TestParseRecipe_Golden(t *testing.T) {
td := filepath.Join("testdata", "recipe_salad")
input := filepath.Join(td, "input.md")
output := filepath.Join(td, "output.json")
inputContent, err := os.ReadFile(input)
if err != nil {
t.Fatalf("read input.md: %v", err)
}
got, err := parser.ParseFile(string(inputContent))
if err != nil {
t.Fatalf("ParseFile: %v", err)
}
var want map[string]any
b, err := os.ReadFile(output)
if err != nil {
t.Fatalf("read expected.json: %v", err)
}
if err := json.Unmarshal(b, &want); err != nil {
t.Fatalf("unmarshal expected.json: %v", err)
}
// Deep structural compare
if !reflect.DeepEqual(want, got) {
gb, _ := json.MarshalIndent(got, "", " ")
wb, _ := json.MarshalIndent(want, "", " ")
t.Fatalf("parsed JSON mismatch\n--- got ---\n%s\n--- want ---\n%s", string(gb), string(wb))
}
}

View File

@@ -3,12 +3,13 @@ package parser
import ( import (
"math" "math"
"git.max-richter.dev/max/marka/parser/blocks"
"github.com/agext/levenshtein" "github.com/agext/levenshtein"
) )
type MatchBlock struct { type MatchBlock struct {
Start, End int Start, End int
Block Block Block blocks.TemplateBlock
src *string src *string
} }
@@ -22,17 +23,17 @@ func (m MatchBlock) GetContent() string {
// MatchBlocksFuzzy finds anchor positions for all BlockMatching blocks using // MatchBlocksFuzzy finds anchor positions for all BlockMatching blocks using
// Levenshtein distance (tolerant matching), then returns ONLY the BlockData // Levenshtein distance (tolerant matching), then returns ONLY the BlockData
// segments as gaps between those anchors. // segments as gaps between those anchors.
func MatchBlocksFuzzy(markdown string, blocks []Block, maxDist float64) []MatchBlock { func MatchBlocksFuzzy(markdown string, templateBlocks []blocks.TemplateBlock, maxDist float64) []MatchBlock {
var out []MatchBlock var out []MatchBlock
var lastIndex = 0 var lastIndex = 0
for i, b := range blocks { for i, b := range templateBlocks {
if b.Type == BlockMatching { if b.Type == blocks.MatchingBlock {
start, end := FuzzyFind(markdown, lastIndex, b.GetContent(), 0.3) start, end := FuzzyFind(markdown, lastIndex, b.GetContent(), 0.3)
if end != -1 { if end != -1 {
if i > 0 { if i > 0 {
previousBlock := blocks[i-1] previousBlock := templateBlocks[i-1]
if previousBlock.Type == BlockData { if previousBlock.Type == blocks.DataBlock {
out = append(out, MatchBlock{ out = append(out, MatchBlock{
Start: lastIndex, Start: lastIndex,
End: start, End: start,
@@ -47,8 +48,8 @@ func MatchBlocksFuzzy(markdown string, blocks []Block, maxDist float64) []MatchB
} }
// Handle the last block // Handle the last block
lastBlock := blocks[len(blocks)-1] lastBlock := templateBlocks[len(templateBlocks)-1]
if lastBlock.Type == BlockData { if lastBlock.Type == blocks.DataBlock {
out = append(out, MatchBlock{ out = append(out, MatchBlock{
Start: lastIndex, Start: lastIndex,
End: len(markdown), End: len(markdown),

View File

@@ -1,37 +1,27 @@
package parser_test package parser_test
import ( import (
"os"
"path/filepath"
"testing" "testing"
"git.max-richter.dev/max/marka/parser" "git.max-richter.dev/max/marka/parser"
"git.max-richter.dev/max/marka/registry"
) )
func readFile(t *testing.T, fileName string) string {
path := filepath.Join("testdata", fileName)
data, err := os.ReadFile(path)
if err != nil {
t.Fatalf("failed to read test data file: %v", err)
}
return string(data)
}
func TestFuzzyFindAll(t *testing.T) { func TestFuzzyFindAll(t *testing.T) {
recipeMd := readFile(t, "baguette.md") recipeMd := readTestDataFile(t, "baguette.md")
tests := []struct { tests := []struct {
Needle string Needle string
Start, End, StartIndex int Start, End, StartIndex int
}{ }{
{StartIndex: 0, Needle: "# Ingredients\n", Start: 72, End: 86}, {StartIndex: 0, Needle: "# Ingredients\n", Start: 77, End: 91},
{StartIndex: 0, Needle: "# Ingrdients\n", Start: 72, End: 86}, {StartIndex: 0, Needle: "# Ingrdients\n", Start: 77, End: 91},
{StartIndex: 0, Needle: "# Inrdients\n", Start: 72, End: 86}, {StartIndex: 0, Needle: "# Inrdients\n", Start: 77, End: 91},
{StartIndex: 0, Needle: "---\n", Start: 0, End: 4}, {StartIndex: 0, Needle: "---\n", Start: 0, End: 4},
{StartIndex: 4, Needle: "---\n", Start: 24, End: 28}, {StartIndex: 4, Needle: "---\n", Start: 29, End: 33},
{StartIndex: 0, Needle: "# Steps\n", Start: 111, End: 119}, {StartIndex: 0, Needle: "# Steps\n", Start: 116, End: 124},
{StartIndex: 0, Needle: "# Stps\n", Start: 111, End: 119}, {StartIndex: 0, Needle: "# Stps\n", Start: 116, End: 124},
{StartIndex: 0, Needle: "# Step\n", Start: 111, End: 119}, {StartIndex: 0, Needle: "# Step\n", Start: 116, End: 124},
} }
for _, test := range tests { for _, test := range tests {
@@ -45,16 +35,20 @@ func TestFuzzyFindAll(t *testing.T) {
} }
func TestFuzzyBlockMatch(t *testing.T) { func TestFuzzyBlockMatch(t *testing.T) {
recipeMd := readFile(t, "baguette.md") recipeMd := readTestDataFile(t, "baguette.md")
schemaMd := readFile(t, "recipe.schema.md") schemaMd, err := registry.GetTemplate("recipe")
blocks := parser.ExtractBlocks(schemaMd) if err != nil {
t.Errorf("Failed to load template: %s", err.Error())
t.FailNow()
}
blocks, _ := parser.ExtractBlocks(schemaMd)
matches := parser.MatchBlocksFuzzy(recipeMd, blocks, 0.3) matches := parser.MatchBlocksFuzzy(recipeMd, blocks, 0.3)
expected := []struct { expected := []struct {
value string value string
}{ }{
{ {
value: "author: Max Richter", value: "author.name: Max Richter",
}, },
{ {
value: "Baguette", value: "Baguette",
@@ -66,7 +60,7 @@ func TestFuzzyBlockMatch(t *testing.T) {
value: "- Flour\n- Water\n- Salt", value: "- Flour\n- Water\n- Salt",
}, },
{ {
value: "1. Mix Flour Water and Salt\n2. Bake the bread", value: "1. Mix Flour Water and Salt\n2. Bake the bread\n",
}, },
} }

View File

@@ -1,17 +1,15 @@
// Package parser provides functions for parsing Markdown templates into
// structured JSON objects that conform to a JSON Schema.
package parser package parser
func ParseFile(markdownContent string) (map[string]any, error) { func Parse(blocks []MatchBlock) map[string]any {
// _schema, err := registry.GetTemplate("Recipe") result := make(map[string]any)
// if err != nil {
// return nil, fmt.Errorf("could not get schema: %w", err)
// }
// Idea is to split the template into blocks, either "matching" blocks which are simple strings. for _, b := range blocks {
// Or "data" blocks which match the content. Then i want to soft match the "matching" blocks and "data" blocks to the template. input := b.GetContent()
// The "matching" blocks should soft match with a levenshtein distance
return map[string]any{}, nil key, value, _ := b.Block.Parse(input)
result[key] = value
}
return result
} }

View File

@@ -1,43 +1,42 @@
package parser_test package parser_test
import ( import (
"encoding/json" "fmt"
"os"
"path/filepath"
"reflect"
"testing" "testing"
"git.max-richter.dev/max/marka/parser" "git.max-richter.dev/max/marka/parser"
"git.max-richter.dev/max/marka/registry"
) )
func TestParseRecipe_Golden(t *testing.T) { func TestParseBaguette(t *testing.T) {
td := filepath.Join("testdata", "recipe_salad") recipeMd := readTestDataFile(t, "baguette.md")
input := filepath.Join(td, "input.md")
output := filepath.Join(td, "output.json")
inputContent, err := os.ReadFile(input) template, err := registry.GetTemplate("recipe")
if err != nil { if err != nil {
t.Fatalf("read input.md: %v", err) t.Fatalf("Err: %s", err)
} }
got, err := parser.ParseFile(string(inputContent)) blocks, err := parser.ExtractBlocks(template)
if err != nil { if err != nil {
t.Fatalf("ParseFile: %v", err) t.Fatalf("Err: %s", err)
} }
var want map[string]any matches := parser.MatchBlocksFuzzy(recipeMd, blocks, 0.3)
b, err := os.ReadFile(output) parsed := parser.Parse(matches)
if err != nil { expected := map[string]any{
t.Fatalf("read expected.json: %v", err) "name": "Baguette",
} "description": "My favourite baguette recipe",
if err := json.Unmarshal(b, &want); err != nil { "recipeIngredient": []string{"Flour", "Water", "Salt"},
t.Fatalf("unmarshal expected.json: %v", err) // "recipeInstructions": []string{
// "Mix Flour Water and Salt",
// "Bake the bread",
// },
} }
// Deep structural compare for k, v := range expected {
if !reflect.DeepEqual(want, got) { if fmt.Sprintf("%v", parsed[k]) != fmt.Sprintf("%v", v) {
gb, _ := json.MarshalIndent(got, "", " ") t.Errorf("Expected %v but got %v", v, parsed[k])
wb, _ := json.MarshalIndent(want, "", " ")
t.Fatalf("parsed JSON mismatch\n--- got ---\n%s\n--- want ---\n%s", string(gb), string(wb))
} }
}
} }

View File

@@ -1,5 +1,5 @@
--- ---
author: Max Richter author.name: Max Richter
--- ---
# Baguette # Baguette

View File

@@ -1,25 +0,0 @@
---
{ . }
---
# { name | text,required }
{ description | text,optional }
## Ingredients
{
path: recipeIngredient
codec: list
required: true
item:
template: "- { . }"
}
## Steps
{
path: recipeInstructions
codec: list
required: true
item:
template: "{ @index }. { . }"
}

16
parser/testutils_test.go Normal file
View File

@@ -0,0 +1,16 @@
package parser_test
import (
"os"
"path/filepath"
"testing"
)
func readTestDataFile(t *testing.T, fileName string) string {
path := filepath.Join("testdata", fileName)
data, err := os.ReadFile(path)
if err != nil {
t.Fatalf("failed to read test data file: %v", err)
}
return string(data)
}

View File

@@ -1,6 +0,0 @@
description: "Core capture aliases for Marka"
patterns:
text: ".+"
word: "\\S+"
num: "(?:\\d+(?:[.,]\\d+)?(?:\\s?\\d+/\\d+)?)" # 3 | 1.5 | 1 1/2
indexMarker: "\\d+[.)]" # 1. / 1)

View File

@@ -28,9 +28,6 @@ var templates embed.FS
//go:embed schema-org/* //go:embed schema-org/*
var schemas embed.FS var schemas embed.FS
//go:embed aliases/*
var aliases embed.FS
func GetTemplates() Source { func GetTemplates() Source {
return src{fsys: templates} return src{fsys: templates}
} }
@@ -49,11 +46,3 @@ func GetTemplate(name string) (string, error) {
return string(templateBytes), nil return string(templateBytes), nil
} }
func GetSchemas() Source {
return src{fsys: schemas}
}
func GetAliases() Source {
return src{fsys: aliases}
}

View File

@@ -2,10 +2,6 @@
{ {
path: . path: .
codec: yaml codec: yaml
required: true
assert:
"@context": https://schema.org/
"@type": Recipe
fields: fields:
- path: name - path: name
codec: text codec: text
@@ -18,28 +14,22 @@
value: Person value: Person
- path: author.name - path: author.name
codec: text codec: text
required: true
- path: datePublished - path: datePublished
codec: text codec: text
optional: true
- path: description - path: description
codec: text codec: text
optional: true
- path: prepTime - path: prepTime
codec: text codec: text
optional: true
- path: cookTime - path: cookTime
codec: text codec: text
optional: true
- path: recipeYield - path: recipeYield
codec: text codec: text
optional: true
} }
--- ---
# { name | text,required } # { name | text,required }
{ description | text,optional } { description | text }
## Ingredients ## Ingredients
{ {