Files
marka/parser/parser.go
2025-09-28 14:40:29 +02:00

129 lines
3.6 KiB
Go

// Package parser provides functions for parsing Markdown templates into
// structured JSON objects that conform to a JSON Schema.
package parser
import (
"fmt"
"strings"
"time"
"git.max-richter.dev/max/marka/parser/decoders"
"git.max-richter.dev/max/marka/parser/matcher"
"git.max-richter.dev/max/marka/registry"
"git.max-richter.dev/max/marka/template"
)
func DetectType(markdownContent string) (string, error) {
defaultSchemaContent, err := registry.GetTemplate("_default")
if err != nil {
return "", fmt.Errorf("could not get schema -> %w", err)
}
defaultSchema, err := template.CompileTemplate(defaultSchemaContent)
if err != nil {
return "", fmt.Errorf("failed to compile template -> %w", err)
}
blocks := matcher.MatchBlocksFuzzy(markdownContent, defaultSchema, 0.3)
fmt.Printf("%+v\n", blocks[0])
fmt.Printf("Content: '%q'\n", blocks[0].GetContent())
result, err := decoders.Parse(blocks)
if err != nil {
return "", fmt.Errorf("failed to parse blocks -> %w", err)
}
if result, ok := result.(map[string]any); ok {
if contentType, ok := result["_type"]; ok {
return contentType.(string), nil
}
return "", fmt.Errorf("frontmatter did not contain '_type'")
}
return "", fmt.Errorf("could not parse frontmatter")
}
func MatchBlocks(markdownContent, templateContent string) ([]matcher.Block, error) {
markdownContent = strings.TrimSuffix(markdownContent, "\n")
tpl, err := template.CompileTemplate(templateContent)
if err != nil {
return nil, fmt.Errorf("failed to compile template -> %w", err)
}
return matcher.MatchBlocksFuzzy(markdownContent, tpl, 0.3), nil
}
func ParseFile(markdownContent string) (any, error) {
timings := make(map[string]int64)
startDetectType := time.Now()
markdownContent = strings.TrimSuffix(markdownContent, "\n")
contentType, err := DetectType(markdownContent)
if err != nil {
return nil, fmt.Errorf("could not detect type -> %w", err)
}
timings["detect_type"] = time.Since(startDetectType).Milliseconds()
startGetTemplate := time.Now()
templateContent, err := registry.GetTemplate(contentType)
if err != nil {
return nil, fmt.Errorf("could not get schema -> %w", err)
}
timings["get_template"] = time.Since(startGetTemplate).Milliseconds()
startTemplate := time.Now()
tpl, err := template.CompileTemplate(templateContent)
if err != nil {
return nil, fmt.Errorf("failed to compile template -> %w", err)
}
timings["template_compilation"] = time.Since(startTemplate).Milliseconds()
startMarkdown := time.Now()
blocks := matcher.MatchBlocksFuzzy(markdownContent, tpl, 0.3)
result, err := decoders.Parse(blocks)
if err != nil {
return nil, fmt.Errorf("failed to parse blocks -> %w", err)
}
timings["markdown_parsing"] = time.Since(startMarkdown).Milliseconds()
response := map[string]any{
"data": result,
"timings": timings,
}
return response, nil
}
func ParseFileWithTemplate(markdownContent string, templateContent string) (any, error) {
timings := make(map[string]int64)
startTemplate := time.Now()
markdownContent = strings.TrimSuffix(markdownContent, "\n")
tpl, err := template.CompileTemplate(templateContent)
if err != nil {
return nil, fmt.Errorf("failed to compile template -> %w", err)
}
timings["template_compilation"] = time.Since(startTemplate).Milliseconds()
startMarkdown := time.Now()
blocks := matcher.MatchBlocksFuzzy(markdownContent, tpl, 0.3)
result, err := decoders.Parse(blocks)
if err != nil {
return nil, fmt.Errorf("failed to parse blocks -> %w", err)
}
timings["markdown_parsing"] = time.Since(startMarkdown).Milliseconds()
response := map[string]any{
"data": result,
"timings": timings,
}
return response, nil
}