75 lines
2.5 KiB
Ruby
75 lines
2.5 KiB
Ruby
# frozen_string_literal: true
|
|
#
|
|
describe DiscourseAi::Utils::Research::LlmFormatter do
|
|
fab!(:user) { Fabricate(:user, username: "test_user") }
|
|
fab!(:topic) { Fabricate(:topic, title: "This is a Test Topic", user: user) }
|
|
fab!(:post) { Fabricate(:post, topic: topic, user: user) }
|
|
let(:tokenizer) { DiscourseAi::Tokenizer::OpenAiTokenizer }
|
|
let(:filter) { DiscourseAi::Utils::Research::Filter.new("@#{user.username}") }
|
|
|
|
describe "#truncate_if_needed" do
|
|
it "returns original content when under token limit" do
|
|
formatter =
|
|
described_class.new(
|
|
filter,
|
|
max_tokens_per_batch: 1000,
|
|
tokenizer: tokenizer,
|
|
max_tokens_per_post: 100,
|
|
)
|
|
|
|
short_text = "This is a short post"
|
|
expect(formatter.send(:truncate_if_needed, short_text)).to eq(short_text)
|
|
end
|
|
|
|
it "truncates content when over token limit" do
|
|
# Create a post with content that will exceed our token limit
|
|
long_text = ("word " * 200).strip
|
|
|
|
formatter =
|
|
described_class.new(
|
|
filter,
|
|
max_tokens_per_batch: 1000,
|
|
tokenizer: tokenizer,
|
|
max_tokens_per_post: 50,
|
|
)
|
|
|
|
truncated = formatter.send(:truncate_if_needed, long_text)
|
|
|
|
expect(truncated).to include("... elided 150 tokens ...")
|
|
expect(truncated).to_not eq(long_text)
|
|
|
|
# Should have roughly 25 words before and 25 after (half of max_tokens_per_post)
|
|
first_chunk = truncated.split("\n\n")[0]
|
|
expect(first_chunk.split(" ").length).to be_within(5).of(25)
|
|
|
|
last_chunk = truncated.split("\n\n")[2]
|
|
expect(last_chunk.split(" ").length).to be_within(5).of(25)
|
|
end
|
|
end
|
|
|
|
describe "#format_post" do
|
|
it "formats posts with truncation for long content" do
|
|
# Set up a post with long content
|
|
long_content = ("word " * 200).strip
|
|
long_post = Fabricate(:post, raw: long_content, topic: topic, user: user)
|
|
|
|
formatter =
|
|
described_class.new(
|
|
filter,
|
|
max_tokens_per_batch: 1000,
|
|
tokenizer: tokenizer,
|
|
max_tokens_per_post: 50,
|
|
)
|
|
|
|
formatted = formatter.send(:format_post, long_post)
|
|
|
|
# Should have standard formatting elements
|
|
expect(formatted).to include("## Post by #{user.username}")
|
|
expect(formatted).to include("Post url: /t/-/#{long_post.topic_id}/#{long_post.post_number}")
|
|
|
|
# Should include truncation marker
|
|
expect(formatted).to include("... elided 150 tokens ...")
|
|
end
|
|
end
|
|
end
|