diff --git a/spec/shared/tokenizer.rb b/spec/shared/tokenizer_spec.rb similarity index 83% rename from spec/shared/tokenizer.rb rename to spec/shared/tokenizer_spec.rb index bfdf6510..47ab900f 100644 --- a/spec/shared/tokenizer.rb +++ b/spec/shared/tokenizer_spec.rb @@ -83,3 +83,20 @@ describe DiscourseAi::Tokenizer::OpenAiTokenizer do end end end + +describe DiscourseAi::Tokenizer::AllMpnetBaseV2Tokenizer do + describe "#size" do + describe "returns a token count" do + it "for a sentence with punctuation and capitalization and numbers" do + expect(described_class.size("Hello, World! 123")).to eq(7) + end + end + end + + describe "#truncate" do + it "truncates a sentence" do + sentence = "foo bar baz qux quux corge grault garply waldo fred plugh xyzzy thud" + expect(described_class.truncate(sentence, 3)).to eq("foo bar") + end + end +end