// Package matcher contains functions for matching template.Block to a string. package matcher import ( "math" "git.max-richter.dev/max/marka/parser/utils" "git.max-richter.dev/max/marka/template" "github.com/agext/levenshtein" ) // Block matches a template.Block to a section inside a string type Block struct { Start, End int Block template.Block src *string } func (m Block) GetContent() string { if m.src == nil || m.Start < 0 || m.End > len(*m.src) || m.Start > m.End { return "" } return (*m.src)[m.Start:m.End] } // MatchBlocksFuzzy finds anchor positions for all BlockMatching blocks using // Levenshtein distance (tolerant matching), then returns ONLY the BlockData // segments as gaps between those anchors. func MatchBlocksFuzzy(markdown string, templateBlocks []template.Block, maxDist float64) []Block { var out []Block lastIndex := 0 for i, b := range templateBlocks { if b.Type == template.MatchingBlock { start, end := FuzzyFind(markdown, lastIndex, b.GetContent(), 0.2) if end != -1 { if i > 0 { previousBlock := templateBlocks[i-1] if previousBlock.Type == template.DataBlock { out = append(out, Block{ Start: lastIndex, End: start, Block: previousBlock, src: &markdown, }) } } lastIndex = end } } } // Handle the last block if len(templateBlocks) > 0 { lastBlock := templateBlocks[len(templateBlocks)-1] if lastBlock.Type == template.DataBlock { out = append(out, Block{ Start: lastIndex, End: len(markdown), Block: lastBlock, src: &markdown, }) } } return out } func FuzzyFind(haystack string, from int, needle string, maxDist float64) (start int, end int) { bestStart, bestEnd, bestDist := -1, -1, math.MaxFloat64 needleLen := len(needle) minWindow := max(1, needleLen-int(float64(needleLen)*maxDist)-1) maxWindow := needleLen + int(float64(needleLen)*maxDist) + 1 for i := from; i < len(haystack); i++ { for windowSize := minWindow; windowSize <= maxWindow && i+windowSize <= len(haystack); windowSize++ { sub := haystack[i : i+windowSize] dist := levenshtein.Distance(sub, needle, nil) maxLen := max(needleLen, windowSize) norm := float64(dist)/float64(maxLen) + float64(utils.Abs(windowSize-needleLen))*0.01/float64(maxLen) if norm < bestDist { bestStart, bestEnd, bestDist = i, i+windowSize, norm } } if bestDist <= 0.05 { break } } if bestStart >= 0 && bestDist <= maxDist+0.01 { return bestStart, bestEnd } return -1, -1 }