FIX: Ensure embeddings database outages are handled gracefully (#80)

The rails_failover middleware will intercept all `PG::ConnectionBad` errors and put the cluster into readonly mode. It does not have any handling for multiple databases. Therefore, an issue with the embeddings database was taking the whole cluster into readonly.

This commit fixes the issue by rescuing `PG::Error` from all AI database accesses, and re-raises errors with a different class. It also adds a spec to ensure that an embeddings database outage does not affect the functionality of the topics/show route.

Co-authored-by: David Taylor <david@taylorhq.com>
This commit is contained in:
Rafael dos Santos Silva 2023-05-23 18:57:52 -03:00 committed by GitHub
parent b213fe7f94
commit cfc6e388df
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 73 additions and 39 deletions

View File

@ -33,6 +33,7 @@ module DiscourseAi
def asymmetric_semantic_search(model, query, limit, offset) def asymmetric_semantic_search(model, query, limit, offset)
embedding = model.generate_embedding(query) embedding = model.generate_embedding(query)
begin
candidate_ids = candidate_ids =
DiscourseAi::Database::Connection DiscourseAi::Database::Connection
.db .db
@ -47,8 +48,12 @@ module DiscourseAi
OFFSET :offset OFFSET :offset
SQL SQL
.map(&:topic_id) .map(&:topic_id)
rescue PG::Error => e
raise StandardError, "No embeddings found for topic #{topic.id}" if candidate_ids.empty? Rails.logger.error(
"Error #{e} querying embeddings for topic #{topic.id} and model #{model.name}",
)
raise MissingEmbeddingError
end
candidate_ids candidate_ids
end end
@ -56,6 +61,7 @@ module DiscourseAi
private private
def query_symmetric_embeddings(model, topic) def query_symmetric_embeddings(model, topic)
begin
DiscourseAi::Database::Connection.db.query(<<~SQL, topic_id: topic.id).map(&:topic_id) DiscourseAi::Database::Connection.db.query(<<~SQL, topic_id: topic.id).map(&:topic_id)
SELECT SELECT
topic_id topic_id
@ -73,15 +79,31 @@ module DiscourseAi
) )
LIMIT 100 LIMIT 100
SQL SQL
rescue PG::Error => e
Rails.logger.error(
"Error #{e} querying embeddings for topic #{topic.id} and model #{model.name}",
)
raise MissingEmbeddingError
end
end end
def persist_embedding(topic, model, embedding) def persist_embedding(topic, model, embedding)
DiscourseAi::Database::Connection.db.exec(<<~SQL, topic_id: topic.id, embedding: embedding) begin
DiscourseAi::Database::Connection.db.exec(
<<~SQL,
INSERT INTO topic_embeddings_#{model.name.underscore} (topic_id, embedding) INSERT INTO topic_embeddings_#{model.name.underscore} (topic_id, embedding)
VALUES (:topic_id, '[:embedding]') VALUES (:topic_id, '[:embedding]')
ON CONFLICT (topic_id) ON CONFLICT (topic_id)
DO UPDATE SET embedding = '[:embedding]' DO UPDATE SET embedding = '[:embedding]'
SQL SQL
topic_id: topic.id,
embedding: embedding,
)
rescue PG::Error => e
Rails.logger.error(
"Error #{e} persisting embedding for topic #{topic.id} and model #{model.name}",
)
end
end end
end end
end end

View File

@ -25,6 +25,7 @@ describe ::TopicsController do
.returns([topic1.id, topic2.id, topic3.id]) .returns([topic1.id, topic2.id, topic3.id])
get("#{topic.relative_url}.json") get("#{topic.relative_url}.json")
expect(response.status).to eq(200)
json = response.parsed_body json = response.parsed_body
expect(json["suggested_topics"].length).to eq(0) expect(json["suggested_topics"].length).to eq(0)
@ -38,5 +39,16 @@ describe ::TopicsController do
expect(json["suggested_topics"].length).to eq(0) expect(json["suggested_topics"].length).to eq(0)
expect(json["related_topics"].length).to eq(2) expect(json["related_topics"].length).to eq(2)
end end
it "excludes embeddings when the database is offline" do
DiscourseAi::Database::Connection.stubs(:db).raises(PG::ConnectionBad)
get "#{topic.relative_url}.json"
expect(response.status).to eq(200)
json = response.parsed_body
expect(json["suggested_topics"].length).not_to eq(0)
expect(json["related_topics"].length).to eq(0)
end
end end
end end