diff --git a/config/locales/client.en.yml b/config/locales/client.en.yml index 27e261b0..129feb4f 100644 --- a/config/locales/client.en.yml +++ b/config/locales/client.en.yml @@ -125,6 +125,7 @@ en: bot_names: gpt-4: "GPT-4" + gpt-4-turbo: "GPT-4 Turbo" gpt-3: 5-turbo: "GPT-3.5" claude-2: "Claude 2" @@ -135,7 +136,7 @@ en: label: "sentiment" title: "Experimental AI-powered sentiment analysis of this person's most recent posts." - + review: types: diff --git a/config/locales/server.en.yml b/config/locales/server.en.yml index d59078dd..7bce1c11 100644 --- a/config/locales/server.en.yml +++ b/config/locales/server.en.yml @@ -42,6 +42,7 @@ en: ai_openai_gpt35_16k_url: "Custom URL used for GPT 3.5 16k chat completions. (for Azure support)" ai_openai_gpt4_url: "Custom URL used for GPT 4 chat completions. (for Azure support)" ai_openai_gpt4_32k_url: "Custom URL used for GPT 4 32k chat completions. (for Azure support)" + ai_openai_gpt4_turbo_url: "Custom URL used for GPT 4 Turbo chat completions. (for Azure support)" ai_openai_dall_e_3_url: "Custom URL used for DALL-E 3 image generation. (for Azure support)" ai_openai_organization: "(Optional, leave empty to omit) Organization id used for the OpenAI API. Passed in using the OpenAI-Organization header." ai_openai_embeddings_url: "Custom URL used for the OpenAI embeddings API. (in the case of Azure it can be: https://COMPANY.openai.azure.com/openai/deployments/DEPLOYMENT/embeddings?api-version=2023-05-15)" diff --git a/config/settings.yml b/config/settings.yml index 1f1b2bd8..ce62a1de 100644 --- a/config/settings.yml +++ b/config/settings.yml @@ -95,6 +95,7 @@ discourse_ai: ai_openai_gpt35_16k_url: "https://api.openai.com/v1/chat/completions" ai_openai_gpt4_url: "https://api.openai.com/v1/chat/completions" ai_openai_gpt4_32k_url: "https://api.openai.com/v1/chat/completions" + ai_openai_gpt4_turbo_url: "https://api.openai.com/v1/chat/completions" ai_openai_dall_e_3_url: "https://api.openai.com/v1/images/generations" ai_openai_embeddings_url: "https://api.openai.com/v1/embeddings" ai_openai_organization: "" @@ -256,6 +257,7 @@ discourse_ai: choices: - gpt-3.5-turbo - gpt-4 + - gpt-4-turbo - claude-2 ai_bot_add_to_header: default: true diff --git a/lib/ai_bot/entry_point.rb b/lib/ai_bot/entry_point.rb index 76bfedf1..c231927c 100644 --- a/lib/ai_bot/entry_point.rb +++ b/lib/ai_bot/entry_point.rb @@ -8,14 +8,18 @@ module DiscourseAi GPT4_ID = -110 GPT3_5_TURBO_ID = -111 CLAUDE_V2_ID = -112 + GPT4_TURBO_ID = -113 BOTS = [ [GPT4_ID, "gpt4_bot", "gpt-4"], [GPT3_5_TURBO_ID, "gpt3.5_bot", "gpt-3.5-turbo"], [CLAUDE_V2_ID, "claude_bot", "claude-2"], + [GPT4_TURBO_ID, "gpt4t_bot", "gpt-4-turbo"], ] def self.map_bot_model_to_user_id(model_name) case model_name + in "gpt-4-turbo" + GPT4_TURBO_ID in "gpt-3.5-turbo" GPT3_5_TURBO_ID in "gpt-4" diff --git a/lib/ai_bot/open_ai_bot.rb b/lib/ai_bot/open_ai_bot.rb index 2a0d3a5f..39f4b0b2 100644 --- a/lib/ai_bot/open_ai_bot.rb +++ b/lib/ai_bot/open_ai_bot.rb @@ -5,6 +5,7 @@ module DiscourseAi class OpenAiBot < Bot def self.can_reply_as?(bot_user) open_ai_bot_ids = [ + DiscourseAi::AiBot::EntryPoint::GPT4_TURBO_ID, DiscourseAi::AiBot::EntryPoint::GPT4_ID, DiscourseAi::AiBot::EntryPoint::GPT3_5_TURBO_ID, ] @@ -23,7 +24,9 @@ module DiscourseAi buffer += @function_size end - if bot_user.id == DiscourseAi::AiBot::EntryPoint::GPT4_ID + if bot_user.id == DiscourseAi::AiBot::EntryPoint::GPT4_TURBO_ID + 150_000 - buffer + elsif bot_user.id == DiscourseAi::AiBot::EntryPoint::GPT4_ID 8192 - buffer else 16_384 - buffer @@ -75,8 +78,15 @@ module DiscourseAi end def model_for(low_cost: false) - return "gpt-4" if bot_user.id == DiscourseAi::AiBot::EntryPoint::GPT4_ID && !low_cost - "gpt-3.5-turbo-16k" + if low_cost || bot_user.id == DiscourseAi::AiBot::EntryPoint::GPT3_5_TURBO_ID + "gpt-3.5-turbo-16k" + elsif bot_user.id == DiscourseAi::AiBot::EntryPoint::GPT4_ID + "gpt-4" + else + # not quite released yet, once released we should replace with + # gpt-4-turbo + "gpt-4-1106-preview" + end end def clean_username(username) diff --git a/lib/inference/open_ai_completions.rb b/lib/inference/open_ai_completions.rb index 3c67412c..6affab7d 100644 --- a/lib/inference/open_ai_completions.rb +++ b/lib/inference/open_ai_completions.rb @@ -29,7 +29,9 @@ module ::DiscourseAi url = if model.include?("gpt-4") - if model.include?("32k") + if model.include?("turbo") || model.include?("1106-preview") + URI(SiteSetting.ai_openai_gpt4_turbo_url) + elsif model.include?("32k") URI(SiteSetting.ai_openai_gpt4_32k_url) else URI(SiteSetting.ai_openai_gpt4_url) @@ -134,6 +136,11 @@ module ::DiscourseAi response_raw << chunk + if (leftover + chunk).length < "data: [DONE]".length + leftover += chunk + next + end + (leftover + chunk) .split("\n") .each do |line| diff --git a/spec/shared/inference/openai_completions_spec.rb b/spec/shared/inference/openai_completions_spec.rb index 76e80883..f1d8a854 100644 --- a/spec/shared/inference/openai_completions_spec.rb +++ b/spec/shared/inference/openai_completions_spec.rb @@ -45,6 +45,7 @@ describe DiscourseAi::Inference::OpenAiCompletions do { setting_name: "ai_openai_gpt35_16k_url", model: "gpt-35-16k-turbo" }, { setting_name: "ai_openai_gpt4_url", model: "gpt-4" }, { setting_name: "ai_openai_gpt4_32k_url", model: "gpt-4-32k" }, + { setting_name: "ai_openai_gpt4_turbo_url", model: "gpt-4-1106-preview" }, ].each do |config| gpt_url = "#{gpt_url_base}/#{config[:model]}" setting_name = config[:setting_name] @@ -263,6 +264,78 @@ describe DiscourseAi::Inference::OpenAiCompletions do expect(log.raw_response_payload).to eq(request_body) end + context "when Webmock has streaming support" do + # See: https://github.com/bblimke/webmock/issues/629 + let(:mock_net_http) do + Class.new(Net::HTTP) do + def request(*) + super do |response| + response.instance_eval do + def read_body(*, &) + @body.each(&) + end + end + + yield response if block_given? + + response + end + end + end + end + + let(:remove_original_net_http) { Net.send(:remove_const, :HTTP) } + let(:original_http) { remove_original_net_http } + let(:stub_net_http) { Net.send(:const_set, :HTTP, mock_net_http) } + + let(:remove_stubbed_net_http) { Net.send(:remove_const, :HTTP) } + let(:restore_net_http) { Net.send(:const_set, :HTTP, original_http) } + + before do + mock_net_http + remove_original_net_http + stub_net_http + end + + after do + remove_stubbed_net_http + restore_net_http + end + + it "support extremely slow streaming" do + raw_data = <<~TEXT +data: {"choices":[{"delta":{"content":"test"}}]} + +data: {"choices":[{"delta":{"content":"test1"}}]} + +data: {"choices":[{"delta":{"content":"test2"}}]} + +data: [DONE] + TEXT + + chunks = raw_data.split("") + + stub_request(:post, "https://api.openai.com/v1/chat/completions").to_return( + status: 200, + body: chunks, + ) + + partials = [] + DiscourseAi::Inference::OpenAiCompletions.perform!([], "gpt-3.5-turbo") do |partial, cancel| + partials << partial + end + + expect(partials.length).to eq(3) + expect(partials).to eq( + [ + { choices: [{ delta: { content: "test" } }] }, + { choices: [{ delta: { content: "test1" } }] }, + { choices: [{ delta: { content: "test2" } }] }, + ], + ) + end + end + it "can operate in streaming mode" do deltas = [ { role: "assistant" },