diff --git a/assets/javascripts/discourse/lib/diff-streamer.gjs b/assets/javascripts/discourse/lib/diff-streamer.gjs index 9ca0748b..170cb1af 100644 --- a/assets/javascripts/discourse/lib/diff-streamer.gjs +++ b/assets/javascripts/discourse/lib/diff-streamer.gjs @@ -4,6 +4,8 @@ import loadJSDiff from "discourse/lib/load-js-diff"; import { parseAsync } from "discourse/lib/text"; const DEFAULT_CHAR_TYPING_DELAY = 30; +const STREAMING_DIFF_TRUNCATE_THRESHOLD = 0.1; +const STREAMING_DIFF_TRUNCATE_BUFFER = 10; export default class DiffStreamer { @tracked isStreaming = false; @@ -122,6 +124,53 @@ export default class DiffStreamer { return maybeUnfinishedImage || maybeUnfinishedLink; } + // this is public to make testing easier + // is makes it easier to do a "streaming diff" where we want to ensure diff + // is focused on the beginning of the text instead of taking the entire body + // into account. + // This ensures that we do not make mistakes and present wildly different diffs + // to what we would stablize on at the end of the stream. + streamingDiff(original, suggestion) { + const maxDiffLength = Math.floor( + suggestion.length + + suggestion.length * STREAMING_DIFF_TRUNCATE_THRESHOLD + + STREAMING_DIFF_TRUNCATE_BUFFER + ); + const head = original.slice(0, maxDiffLength); + const tail = original.slice(maxDiffLength); + + const diffArray = this.jsDiff.diffWordsWithSpace(head, suggestion); + + if (tail.length > 0) { + // if last in the array is added, and previous is removed then flip them + let last = diffArray[diffArray.length - 1]; + let secondLast = diffArray[diffArray.length - 2]; + + if (last.added && secondLast.removed) { + diffArray.pop(); + diffArray.pop(); + diffArray.push(last); + diffArray.push(secondLast); + + last = secondLast; + secondLast = diffArray[diffArray.length - 2]; + } + + if (!last.removed) { + last = { + added: false, + removed: true, + value: "", + }; + diffArray.push(last); + } + + last.value = last.value + tail; + } + + return diffArray; + } + async #streamNextChar() { if (this.currentWordIndex < this.words.length) { const currentToken = this.words[this.currentWordIndex]; @@ -134,7 +183,7 @@ export default class DiffStreamer { this.currentWordIndex++; this.currentCharIndex = 0; - const originalDiff = this.jsDiff.diffWordsWithSpace( + const originalDiff = this.streamingDiff( this.selectedText, this.suggestion ); diff --git a/test/javascripts/unit/lib/diff-streamer-test.js b/test/javascripts/unit/lib/diff-streamer-test.js new file mode 100644 index 00000000..d01c29c4 --- /dev/null +++ b/test/javascripts/unit/lib/diff-streamer-test.js @@ -0,0 +1,67 @@ +import { module, test } from "qunit"; +import DiffStreamer from "discourse/plugins/discourse-ai/discourse/lib/diff-streamer"; + +module("Unit | Lib | diff-streamer", function () { + test("streamingDiff correctly handles trivial cases", async function (assert) { + const originalText = "helo world"; + const targetText = "hello world"; + + const diffStreamer = new DiffStreamer(this.originalTextContent); + await diffStreamer.loadJSDiff(); + + const diffResult = diffStreamer.streamingDiff(originalText, targetText); + + const expectedDiff = [ + { count: 1, added: false, removed: true, value: "helo" }, + { count: 1, added: true, removed: false, value: "hello" }, + { count: 2, added: false, removed: false, value: " world" }, + ]; + + assert.deepEqual( + diffResult, + expectedDiff, + "Diff result should match the expected structure" + ); + }); + + test("streamingDiff correctly consolidates and handles diff drift", async function (assert) { + const originalText = + "This is todone, but I want to can why.\n\nWe\n\nSEO Tags supports a `canonical_url` override. I tried a few possibilities there. The one I wanted to work, ex: `https://www.discourse.org/de`, appended an extra `/de` on the URL, only in the deploy b"; + const targetText = "This is to-done"; + + const diffStreamer = new DiffStreamer(this.originalTextContent); + await diffStreamer.loadJSDiff(); + + const diffResult = diffStreamer.streamingDiff(originalText, targetText); + + // Verify the diff result is an array with the expected structure + assert.true(Array.isArray(diffResult), "Diff result should be an array"); + assert.strictEqual( + diffResult.length, + 3, + "Expecting exactly three parts in the diff result" + ); + + assert.strictEqual( + diffResult[0].value, + "This is ", + "First part should be unchanged" + ); + + assert.strictEqual( + diffResult[1].value, + "to-done", + "Second part should be an insertion" + ); + + assert.true(diffResult[1].added, "Second part should be an insertion"); + + assert.strictEqual( + diffResult[2].value.length, + 235, + "Third part should include all text" + ); + + assert.true(diffResult[2].removed, "Third part should be a removal"); + }); +});