105 lines
3.5 KiB
Ruby
105 lines
3.5 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
RSpec.describe DiscourseAi::Embeddings::Schema do
|
|
subject(:posts_schema) { described_class.for(Post, vector: vector) }
|
|
|
|
let(:embeddings) { [0.0038490295] * vector.dimensions }
|
|
fab!(:post) { Fabricate(:post, post_number: 1) }
|
|
let(:digest) { OpenSSL::Digest.hexdigest("SHA1", "test") }
|
|
let(:vector) do
|
|
DiscourseAi::Embeddings::VectorRepresentations::AllMpnetBaseV2.new(
|
|
DiscourseAi::Embeddings::Strategies::Truncation.new,
|
|
)
|
|
end
|
|
|
|
before { posts_schema.store(post, embeddings, digest) }
|
|
|
|
describe "#find_by_target" do
|
|
it "gets you the post_id of the record that matches the post" do
|
|
embeddings_record = posts_schema.find_by_target(post)
|
|
|
|
expect(embeddings_record.digest).to eq(digest)
|
|
expect(JSON.parse(embeddings_record.embeddings)).to eq(embeddings)
|
|
end
|
|
end
|
|
|
|
describe "#find_by_embedding" do
|
|
it "gets you the record that matches the embedding" do
|
|
embeddings_record = posts_schema.find_by_embedding(embeddings)
|
|
|
|
expect(embeddings_record.digest).to eq(digest)
|
|
expect(embeddings_record.post_id).to eq(post.id)
|
|
end
|
|
end
|
|
|
|
describe "similarity searches" do
|
|
fab!(:post_2) { Fabricate(:post) }
|
|
let(:similar_embeddings) { [0.0038490294] * vector.dimensions }
|
|
|
|
describe "#symmetric_similarity_search" do
|
|
before { posts_schema.store(post_2, similar_embeddings, digest) }
|
|
|
|
it "returns target_id with similar embeddings" do
|
|
similar_records = posts_schema.symmetric_similarity_search(post)
|
|
|
|
expect(similar_records.map(&:post_id)).to contain_exactly(post.id, post_2.id)
|
|
end
|
|
|
|
it "let's you apply additional scopes to filter results further" do
|
|
similar_records =
|
|
posts_schema.symmetric_similarity_search(post) do |builder|
|
|
builder.where("post_id = ?", post_2.id)
|
|
end
|
|
|
|
expect(similar_records.map(&:post_id)).to contain_exactly(post_2.id)
|
|
end
|
|
|
|
it "let's you join on additional tables and combine with additional scopes" do
|
|
similar_records =
|
|
posts_schema.symmetric_similarity_search(post) do |builder|
|
|
builder.join("posts p on p.id = post_id")
|
|
builder.join("topics t on t.id = p.topic_id")
|
|
builder.where("t.id = ?", post_2.topic_id)
|
|
end
|
|
|
|
expect(similar_records.map(&:post_id)).to contain_exactly(post_2.id)
|
|
end
|
|
end
|
|
|
|
describe "#asymmetric_similarity_search" do
|
|
it "returns target_id with similar embeddings" do
|
|
similar_records =
|
|
posts_schema.asymmetric_similarity_search(similar_embeddings, limit: 1, offset: 0)
|
|
|
|
expect(similar_records.map(&:post_id)).to contain_exactly(post.id)
|
|
end
|
|
|
|
it "let's you apply additional scopes to filter results further" do
|
|
similar_records =
|
|
posts_schema.asymmetric_similarity_search(
|
|
similar_embeddings,
|
|
limit: 1,
|
|
offset: 0,
|
|
) { |builder| builder.where("post_id <> ?", post.id) }
|
|
|
|
expect(similar_records.map(&:post_id)).to be_empty
|
|
end
|
|
|
|
it "let's you join on additional tables and combine with additional scopes" do
|
|
similar_records =
|
|
posts_schema.asymmetric_similarity_search(
|
|
similar_embeddings,
|
|
limit: 1,
|
|
offset: 0,
|
|
) do |builder|
|
|
builder.join("posts p on p.id = post_id")
|
|
builder.join("topics t on t.id = p.topic_id")
|
|
builder.where("t.id <> ?", post.topic_id)
|
|
end
|
|
|
|
expect(similar_records.map(&:post_id)).to be_empty
|
|
end
|
|
end
|
|
end
|
|
end
|