Files
marka/parser/matcher.go
2025-08-17 00:46:45 +02:00

105 lines
2.4 KiB
Go

package parser
import (
"math"
"git.max-richter.dev/max/marka/parser/blocks"
"github.com/agext/levenshtein"
)
type MatchBlock struct {
Start, End int
Block blocks.TemplateBlock
src *string
}
func (m MatchBlock) GetContent() string {
if m.src == nil || m.Start < 0 || m.End > len(*m.src) || m.Start > m.End {
return ""
}
return (*m.src)[m.Start:m.End]
}
// MatchBlocksFuzzy finds anchor positions for all BlockMatching blocks using
// Levenshtein distance (tolerant matching), then returns ONLY the BlockData
// segments as gaps between those anchors.
func MatchBlocksFuzzy(markdown string, templateBlocks []blocks.TemplateBlock, maxDist float64) []MatchBlock {
var out []MatchBlock
var lastIndex = 0
for i, b := range templateBlocks {
if b.Type == blocks.MatchingBlock {
start, end := FuzzyFind(markdown, lastIndex, b.GetContent(), 0.3)
if end != -1 {
if i > 0 {
previousBlock := templateBlocks[i-1]
if previousBlock.Type == blocks.DataBlock {
out = append(out, MatchBlock{
Start: lastIndex,
End: start,
Block: previousBlock,
src: &markdown,
})
}
}
lastIndex = end
}
}
}
// Handle the last block
lastBlock := templateBlocks[len(templateBlocks)-1]
if lastBlock.Type == blocks.DataBlock {
out = append(out, MatchBlock{
Start: lastIndex,
End: len(markdown),
Block: lastBlock,
src: &markdown,
})
}
return out
}
func FuzzyFind(haystack string, from int, needle string, maxDist float64) (start int, end int) {
bestStart, bestEnd, bestDist := -1, -1, math.MaxFloat64
needleLen := len(needle)
minWindow := max(1, needleLen-int(float64(needleLen)*maxDist)-1)
maxWindow := needleLen + int(float64(needleLen)*maxDist) + 1
for i := from; i < len(haystack); i++ {
for windowSize := minWindow; windowSize <= maxWindow && i+windowSize <= len(haystack); windowSize++ {
sub := haystack[i : i+windowSize]
dist := levenshtein.Distance(sub, needle, nil)
maxLen := max(needleLen, windowSize)
norm := float64(dist)/float64(maxLen) + float64(abs(windowSize-needleLen))*0.01/float64(maxLen)
if norm < bestDist {
bestStart, bestEnd, bestDist = i, i+windowSize, norm
}
}
if bestDist <= 0.05 {
break
}
}
if bestStart >= 0 && bestDist <= maxDist+0.01 {
return bestStart, bestEnd
}
return -1, -1
}
func abs(x int) int {
if x < 0 {
return -x
}
return x
}
func max(a, b int) int {
if a > b {
return a
}
return b
}