discourse-ai/lib/utils/diff_utils/simple_diff.rb

179 lines
5.1 KiB
Ruby

# frozen_string_literal: true
module DiscourseAi
module Utils
module DiffUtils
class SimpleDiff
LEVENSHTEIN_THRESHOLD = 2
class Error < StandardError
end
class NoMatchError < Error
end
def self.apply(content, search, replace)
new.apply(content, search, replace)
end
def apply(content, search, replace)
raise ArgumentError, "content cannot be nil" if content.nil?
raise ArgumentError, "search cannot be nil" if search.nil?
raise ArgumentError, "replace cannot be nil" if replace.nil?
lines = content.split("\n")
search_lines = search.split("\n")
# 1. Try exact matching
match_positions =
find_matches(lines, search_lines) { |line, search_line| line == search_line }
# 2. Try stripped matching
if match_positions.empty?
match_positions =
find_matches(lines, search_lines) do |line, search_line|
line.strip == search_line.strip
end
end
# 3. Try fuzzy matching
if match_positions.empty?
match_positions =
find_matches(lines, search_lines) do |line, search_line|
fuzzy_match?(line, search_line)
end
end
# 4. Try block matching as last resort
if match_positions.empty?
if block_matches = find_block_matches(content, search)
return replace_blocks(content, block_matches, replace)
end
end
if match_positions.empty?
raise NoMatchError, "Could not find a match for the search content"
end
# Replace matches in reverse order
match_positions.sort.reverse.each do |pos|
lines.slice!(pos, search_lines.length)
lines.insert(pos, *replace.split("\n"))
end
lines.join("\n")
end
private
def find_matches(lines, search_lines)
matches = []
max_index = lines.length - search_lines.length
(0..max_index).each do |i|
if (0...search_lines.length).all? { |j| yield(lines[i + j], search_lines[j]) }
matches << i
end
end
matches
end
def fuzzy_match?(line, search_line)
return true if line.strip == search_line.strip
s1 = line.lstrip
s2 = search_line.lstrip
levenshtein_distance(s1, s2) <= LEVENSHTEIN_THRESHOLD
end
def levenshtein_distance(s1, s2)
m = s1.length
n = s2.length
d = Array.new(m + 1) { Array.new(n + 1, 0) }
(0..m).each { |i| d[i][0] = i }
(0..n).each { |j| d[0][j] = j }
(1..m).each do |i|
(1..n).each do |j|
cost = s1[i - 1] == s2[j - 1] ? 0 : 1
d[i][j] = [d[i - 1][j] + 1, d[i][j - 1] + 1, d[i - 1][j - 1] + cost].min
end
end
d[m][n]
end
def find_block_matches(content, search)
content_blocks = extract_blocks(content)
search_blocks = extract_blocks(search)
return nil if content_blocks.empty? || search_blocks.empty?
matches = []
search_blocks.each do |search_block|
content_blocks.each do |content_block|
matches << content_block if content_block[:text] == search_block[:text]
end
end
matches.empty? ? nil : matches
end
def extract_blocks(text)
lines = text.split("\n")
blocks = []
current_block = []
block_start = nil
lines.each_with_index do |line, index|
if line =~ /^[^\s]/
# Save previous block if exists
if !current_block.empty?
current_block << line
blocks << {
start: block_start,
length: current_block.length,
text: current_block.join("\n").strip,
}
current_block = []
else
current_block = [line]
block_start = index
end
else
# Continue current block
current_block << line if current_block.any?
end
end
# Add final block
if !current_block.empty?
blocks << {
start: block_start,
length: current_block.length,
text: current_block.join("\n").strip,
}
end
blocks
end
def replace_blocks(content, blocks, replace)
lines = content.split("\n")
# Sort blocks in reverse order to maintain correct positions
blocks
.sort_by { |b| -b[:start] }
.each_with_index do |block, index|
replacement = index.zero? ? replace : ""
lines.slice!(block[:start], block[:length])
lines.insert(block[:start], *replacement.split("\n"))
end
lines.join("\n")
end
end
end
end
end