125 lines
3.5 KiB
Go
125 lines
3.5 KiB
Go
// Package parser provides functions for parsing Markdown templates into
|
|
// structured JSON objects that conform to a JSON Schema.
|
|
package parser
|
|
|
|
import (
|
|
"fmt"
|
|
"strings"
|
|
"time"
|
|
|
|
"git.max-richter.dev/max/marka/parser/decoders"
|
|
"git.max-richter.dev/max/marka/parser/matcher"
|
|
"git.max-richter.dev/max/marka/registry"
|
|
"git.max-richter.dev/max/marka/template"
|
|
)
|
|
|
|
func DetectType(markdownContent string) (string, error) {
|
|
defaultSchemaContent, err := registry.GetTemplate("_default")
|
|
if err != nil {
|
|
return "", fmt.Errorf("could not get schema -> %w", err)
|
|
}
|
|
|
|
defaultSchema, err := template.CompileTemplate(defaultSchemaContent)
|
|
if err != nil {
|
|
return "", fmt.Errorf("failed to compile template -> %w", err)
|
|
}
|
|
|
|
blocks := matcher.MatchBlocksFuzzy(markdownContent, defaultSchema, 0.3)
|
|
|
|
result, err := decoders.Parse(blocks)
|
|
if err != nil {
|
|
return "", fmt.Errorf("failed to parse blocks -> %w", err)
|
|
}
|
|
|
|
if result, ok := result.(map[string]any); ok {
|
|
if contentType, ok := result["_type"]; ok {
|
|
return contentType.(string), nil
|
|
}
|
|
return "", fmt.Errorf("frontmatter did not contain '_type'")
|
|
}
|
|
return "", fmt.Errorf("could not parse frontmatter")
|
|
}
|
|
|
|
func MatchBlocks(markdownContent, templateContent string) ([]matcher.Block, error) {
|
|
markdownContent = strings.TrimSuffix(markdownContent, "\n")
|
|
|
|
tpl, err := template.CompileTemplate(templateContent)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to compile template -> %w", err)
|
|
}
|
|
|
|
return matcher.MatchBlocksFuzzy(markdownContent, tpl, 0.3), nil
|
|
}
|
|
|
|
func ParseFile(markdownContent string) (any, error) {
|
|
timings := make(map[string]int64)
|
|
|
|
startDetectType := time.Now()
|
|
markdownContent = strings.TrimSuffix(markdownContent, "\n")
|
|
|
|
contentType, err := DetectType(markdownContent)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("could not detect type -> %w", err)
|
|
}
|
|
timings["detect_type"] = time.Since(startDetectType).Milliseconds()
|
|
|
|
startGetTemplate := time.Now()
|
|
templateContent, err := registry.GetTemplate(contentType)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("could not get schema -> %w", err)
|
|
}
|
|
timings["get_template"] = time.Since(startGetTemplate).Milliseconds()
|
|
|
|
startTemplate := time.Now()
|
|
tpl, err := template.CompileTemplate(templateContent)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to compile template -> %w", err)
|
|
}
|
|
timings["template_compilation"] = time.Since(startTemplate).Milliseconds()
|
|
|
|
startMarkdown := time.Now()
|
|
blocks := matcher.MatchBlocksFuzzy(markdownContent, tpl, 0.3)
|
|
|
|
result, err := decoders.Parse(blocks)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to parse blocks -> %w", err)
|
|
}
|
|
timings["markdown_parsing"] = time.Since(startMarkdown).Milliseconds()
|
|
|
|
response := map[string]any{
|
|
"data": result,
|
|
"timings": timings,
|
|
}
|
|
|
|
return response, nil
|
|
}
|
|
|
|
func ParseFileWithTemplate(markdownContent string, templateContent string) (any, error) {
|
|
timings := make(map[string]int64)
|
|
|
|
startTemplate := time.Now()
|
|
markdownContent = strings.TrimSuffix(markdownContent, "\n")
|
|
|
|
tpl, err := template.CompileTemplate(templateContent)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to compile template -> %w", err)
|
|
}
|
|
timings["template_compilation"] = time.Since(startTemplate).Milliseconds()
|
|
|
|
startMarkdown := time.Now()
|
|
blocks := matcher.MatchBlocksFuzzy(markdownContent, tpl, 0.3)
|
|
|
|
result, err := decoders.Parse(blocks)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to parse blocks -> %w", err)
|
|
}
|
|
timings["markdown_parsing"] = time.Since(startMarkdown).Milliseconds()
|
|
|
|
response := map[string]any{
|
|
"data": result,
|
|
"timings": timings,
|
|
}
|
|
|
|
return response, nil
|
|
}
|