diff --git a/app/models/embedding_definition.rb b/app/models/embedding_definition.rb index 23b37ec4..92caabee 100644 --- a/app/models/embedding_definition.rb +++ b/app/models/embedding_definition.rb @@ -84,7 +84,7 @@ class EmbeddingDefinition < ActiveRecord::Base dimensions: 2000, max_sequence_length: 8191, pg_function: "<=>", - tokenizer_class: "DiscourseAi::Tokenizer::OpenAiTokenizer", + tokenizer_class: "DiscourseAi::Tokenizer::OpenAiCl100kTokenizer", url: "https://api.openai.com/v1/embeddings", provider: OPEN_AI, matryoshka_dimensions: true, @@ -98,7 +98,7 @@ class EmbeddingDefinition < ActiveRecord::Base dimensions: 1536, max_sequence_length: 8191, pg_function: "<=>", - tokenizer_class: "DiscourseAi::Tokenizer::OpenAiTokenizer", + tokenizer_class: "DiscourseAi::Tokenizer::OpenAiCl100kTokenizer", url: "https://api.openai.com/v1/embeddings", provider: OPEN_AI, matryoshka_dimensions: true, @@ -112,7 +112,7 @@ class EmbeddingDefinition < ActiveRecord::Base dimensions: 1536, max_sequence_length: 8191, pg_function: "<=>", - tokenizer_class: "DiscourseAi::Tokenizer::OpenAiTokenizer", + tokenizer_class: "DiscourseAi::Tokenizer::OpenAiCl100kTokenizer", url: "https://api.openai.com/v1/embeddings", provider: OPEN_AI, provider_params: { diff --git a/db/migrate/20250715165701_update_open_ai_embeddings_tokenizer.rb b/db/migrate/20250715165701_update_open_ai_embeddings_tokenizer.rb new file mode 100644 index 00000000..1d8b3888 --- /dev/null +++ b/db/migrate/20250715165701_update_open_ai_embeddings_tokenizer.rb @@ -0,0 +1,14 @@ +# frozen_string_literal: true +class UpdateOpenAiEmbeddingsTokenizer < ActiveRecord::Migration[7.2] + def up + execute <<~SQL + UPDATE embedding_definitions + SET tokenizer_class = 'DiscourseAi::Tokenizer::OpenAiCl100kTokenizer' + WHERE url LIKE '%https://api.openai.com/%' AND tokenizer_class <> 'DiscourseAi::Tokenizer::OpenAiCl100kTokenizer' + SQL + end + + def down + raise ActiveRecord::IrreversibleMigration + end +end diff --git a/spec/system/embeddings/ai_embedding_definition_spec.rb b/spec/system/embeddings/ai_embedding_definition_spec.rb index f20a3327..481fdac9 100644 --- a/spec/system/embeddings/ai_embedding_definition_spec.rb +++ b/spec/system/embeddings/ai_embedding_definition_spec.rb @@ -50,7 +50,7 @@ RSpec.describe "Managing Embeddings configurations", type: :system, js: true do form.field("provider").select(EmbeddingDefinition::OPEN_AI) form.field("url").fill_in("https://api.openai.com/v1/embeddings") form.field("api_key").fill_in(api_key) - form.field("tokenizer_class").select("DiscourseAi::Tokenizer::OpenAiTokenizer") + form.field("tokenizer_class").select("DiscourseAi::Tokenizer::OpenAiCl100kTokenizer") embed_prefix = "On creation:" search_prefix = "On search:"