153 lines
5.2 KiB
Ruby
153 lines
5.2 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
RSpec.describe DiscourseAi::Embeddings::Vector do
|
|
shared_examples "generates and store embeddings using a vector definition" do
|
|
subject(:vector) { described_class.new(vdef) }
|
|
|
|
let(:expected_embedding_1) { [0.0038493] * vdef.dimensions }
|
|
let(:expected_embedding_2) { [0.0037684] * vdef.dimensions }
|
|
|
|
before { SiteSetting.ai_embeddings_selected_model = vdef.id }
|
|
|
|
let(:topics_schema) { DiscourseAi::Embeddings::Schema.for(Topic) }
|
|
let(:posts_schema) { DiscourseAi::Embeddings::Schema.for(Post) }
|
|
|
|
fab!(:topic)
|
|
fab!(:post) { Fabricate(:post, post_number: 1, topic: topic) }
|
|
fab!(:post2) { Fabricate(:post, post_number: 2, topic: topic) }
|
|
|
|
describe "#vector_from" do
|
|
it "creates a vector from a given string" do
|
|
text = "This is a piece of text"
|
|
stub_vector_mapping(text, expected_embedding_1)
|
|
|
|
expect(vector.vector_from(text)).to eq(expected_embedding_1)
|
|
end
|
|
end
|
|
|
|
describe "#generate_representation_from" do
|
|
it "creates a vector from a topic and stores it in the database" do
|
|
text = vdef.prepare_target_text(topic)
|
|
stub_vector_mapping(text, expected_embedding_1)
|
|
|
|
vector.generate_representation_from(topic)
|
|
|
|
expect(topics_schema.find_by_embedding(expected_embedding_1).topic_id).to eq(topic.id)
|
|
end
|
|
|
|
it "creates a vector from a post and stores it in the database" do
|
|
text = vdef.prepare_target_text(post2)
|
|
stub_vector_mapping(text, expected_embedding_1)
|
|
|
|
vector.generate_representation_from(post)
|
|
|
|
expect(posts_schema.find_by_embedding(expected_embedding_1).post_id).to eq(post.id)
|
|
end
|
|
end
|
|
|
|
describe "#gen_bulk_reprensentations" do
|
|
fab!(:topic_2) { Fabricate(:topic) }
|
|
fab!(:post_2_1) { Fabricate(:post, post_number: 1, topic: topic_2) }
|
|
fab!(:post_2_2) { Fabricate(:post, post_number: 2, topic: topic_2) }
|
|
|
|
it "creates a vector for each object in the relation" do
|
|
text = vdef.prepare_target_text(topic)
|
|
|
|
text2 = vdef.prepare_target_text(topic_2)
|
|
|
|
stub_vector_mapping(text, expected_embedding_1)
|
|
stub_vector_mapping(text2, expected_embedding_2)
|
|
|
|
vector.gen_bulk_reprensentations(Topic.where(id: [topic.id, topic_2.id]))
|
|
|
|
expect(topics_schema.find_by_embedding(expected_embedding_1).topic_id).to eq(topic.id)
|
|
end
|
|
|
|
it "does nothing if passed record has no content" do
|
|
expect { vector.gen_bulk_reprensentations([Topic.new]) }.not_to raise_error
|
|
end
|
|
|
|
it "doesn't ask for a new embedding if digest is the same" do
|
|
text = vdef.prepare_target_text(topic)
|
|
stub_vector_mapping(text, expected_embedding_1)
|
|
|
|
original_vector_gen = Time.zone.parse("2021-06-04 10:00")
|
|
|
|
freeze_time(original_vector_gen) do
|
|
vector.gen_bulk_reprensentations(Topic.where(id: [topic.id]))
|
|
end
|
|
# check vector exists
|
|
expect(topics_schema.find_by_embedding(expected_embedding_1).topic_id).to eq(topic.id)
|
|
|
|
vector.gen_bulk_reprensentations(Topic.where(id: [topic.id]))
|
|
|
|
expect(topics_schema.find_by_target(topic).updated_at).to eq_time(original_vector_gen)
|
|
end
|
|
end
|
|
end
|
|
|
|
context "with open_ai as the provider" do
|
|
fab!(:vdef) { Fabricate(:open_ai_embedding_def) }
|
|
|
|
def stub_vector_mapping(text, expected_embedding)
|
|
EmbeddingsGenerationStubs.openai_service(
|
|
vdef.lookup_custom_param("model_name"),
|
|
text,
|
|
expected_embedding,
|
|
)
|
|
end
|
|
|
|
it_behaves_like "generates and store embeddings using a vector definition"
|
|
|
|
context "when matryoshka_dimensions is enabled" do
|
|
it "passes the dimensions param" do
|
|
shorter_dimensions = 10
|
|
vdef.update!(dimensions: shorter_dimensions, matryoshka_dimensions: true)
|
|
text = "This is a piece of text"
|
|
short_expected_embedding = [0.0038493] * shorter_dimensions
|
|
|
|
EmbeddingsGenerationStubs.openai_service(
|
|
vdef.lookup_custom_param("model_name"),
|
|
text,
|
|
short_expected_embedding,
|
|
extra_args: {
|
|
dimensions: shorter_dimensions,
|
|
},
|
|
)
|
|
|
|
expect(described_class.new(vdef).vector_from(text)).to eq(short_expected_embedding)
|
|
end
|
|
end
|
|
end
|
|
|
|
context "with hugging_face as the provider" do
|
|
fab!(:vdef) { Fabricate(:embedding_definition) }
|
|
|
|
def stub_vector_mapping(text, expected_embedding)
|
|
EmbeddingsGenerationStubs.hugging_face_service(text, expected_embedding)
|
|
end
|
|
|
|
it_behaves_like "generates and store embeddings using a vector definition"
|
|
end
|
|
|
|
context "with google as the provider" do
|
|
fab!(:vdef) { Fabricate(:gemini_embedding_def) }
|
|
|
|
def stub_vector_mapping(text, expected_embedding)
|
|
EmbeddingsGenerationStubs.gemini_service(vdef.api_key, text, expected_embedding)
|
|
end
|
|
|
|
it_behaves_like "generates and store embeddings using a vector definition"
|
|
end
|
|
|
|
context "with cloudflare as the provider" do
|
|
fab!(:vdef) { Fabricate(:cloudflare_embedding_def) }
|
|
|
|
def stub_vector_mapping(text, expected_embedding)
|
|
EmbeddingsGenerationStubs.cloudflare_service(text, expected_embedding)
|
|
end
|
|
|
|
it_behaves_like "generates and store embeddings using a vector definition"
|
|
end
|
|
end
|