FEATURE: Update model names and specs (#1262)

* FEATURE: Update model names and specs - not a bug, but made it explicit that tools and thinking are not a chat thing - updated all models to latest in presets (Gemini and OpenAI) * allow larger context windows
2025-04-15 16:33:44 +10:00 · 2025-04-15 16:33:44 +10:00 · 274a54a324
parent fe7e73a6a4
commit 274a54a324
4 changed files with 36 additions and 22 deletions
--- a/config/locales/client.en.yml
+++ b/config/locales/client.en.yml
@ -446,15 +446,14 @@ en:
          anthropic-claude-3-7-sonnet: "Anthropic's most intelligent model"
          anthropic-claude-3-5-haiku: "Fast and cost-effective"
          anthropic-claude-3-opus: "Excels at writing and complex tasks"
-          google-gemini-1-5-pro: "Mid-sized multimodal model capable of a wide range of tasks"
-          google-gemini-1-5-flash: "Lightweight, fast, and cost-efficient with multimodal reasoning"
-          open_ai-gpt-4-turbo: "Previous generation high-intelligence model"
-          open_ai-gpt-4o: "High intelligence model for complex, multi-step tasks"
-          open_ai-gpt-4o-mini: "Affordable and fast small model for lightweight tasks"
-          open_ai-o1-mini: "Cost-efficient reasoning model"
-          open_ai-o1-preview: "Open AI's most capabale reasoning model (preview)"
+          google-gemini-2-5-pro: "Mid-sized multimodal model capable of a wide range of tasks"
+          google-gemini-2-0-flash: "Lightweight, fast, and cost-efficient with multimodal reasoning"
+          google-gemini-2-0-flash-lite: "Cost efficient and low latency model"
          open_ai-o1: "Open AI's most capable reasoning model"
          open_ai-o3-mini: "Advanced Cost-efficient reasoning model"
+          open_ai-gpt-4-1: "Open AI's flagship model. It is well suited for problem solving across domains"
+          open_ai-gpt-4-1-mini: "Provides a balance between intelligence, speed, and cost that makes it an attractive model for many use cases."
+          open_ai-gpt-4-1-nano: "Fastest, most cost-effective GPT-4.1 model."
          samba_nova-Meta-Llama-3-1-8B-Instruct: "Efficient lightweight multilingual model"
          samba_nova-Meta-Llama-3-3-70B-Instruct": "Powerful multipurpose model"
          mistral-mistral-large-latest: "Mistral's most powerful model"
--- a/lib/ai_bot/playground.rb
+++ b/lib/ai_bot/playground.rb
@ -247,7 +247,7 @@ module DiscourseAi

        system_insts = <<~TEXT.strip
          You are titlebot. Given a conversation, you will suggest a title.
-  
+
          - You will never respond with anything but the suggested title.
          - You will always match the conversation language in your title suggestion.
          - Title will capture the essence of the conversation.
@ -255,11 +255,11 @@ module DiscourseAi

        instruction = <<~TEXT.strip
          Given the following conversation:
-  
+
          {{{
          #{conversation}
          }}}
-  
+
          Reply only with a title that is 7 words or less.
        TEXT

@ -336,6 +336,8 @@ module DiscourseAi

        new_prompts =
          bot.reply(context) do |partial, cancel, placeholder|
+            # no support for tools or thinking by design
+            next if !partial.is_a?(String)
            if !reply
              # just eat all leading spaces we can not create the message
              next if partial.blank?
--- a/lib/completions/llm.rb
+++ b/lib/completions/llm.rb
@ -42,18 +42,25 @@ module DiscourseAi
                  id: "google",
                  models: [
                    {
-                      name: "gemini-1.5-pro",
+                      name: "gemini-2.5-pro",
                      tokens: 800_000,
                      endpoint:
-                        "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-pro-latest",
-                      display_name: "Gemini 1.5 Pro",
+                        "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-pro-preview-03-25",
+                      display_name: "Gemini 2.5 Pro",
                    },
                    {
-                      name: "gemini-1.5-flash",
+                      name: "gemini-2.0-flash",
                      tokens: 800_000,
                      endpoint:
-                        "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest",
-                      display_name: "Gemini 1.5 Flash",
+                        "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash",
+                      display_name: "Gemini 2.0 Flash",
+                    },
+                    {
+                      name: "gemini-2.0-flash-lite",
+                      tokens: 800_000,
+                      endpoint:
+                        "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-lite",
+                      display_name: "Gemini 2.0 Flash Lite",
                    },
                  ],
                  tokenizer: DiscourseAi::Tokenizer::GeminiTokenizer,
@ -64,11 +71,9 @@ module DiscourseAi
                  models: [
                    { name: "o3-mini", tokens: 200_000, display_name: "o3 Mini" },
                    { name: "o1", tokens: 200_000, display_name: "o1" },
-                    { name: "o1-preview", tokens: 131_072, display_name: "o1 preview" },
-                    { name: "o1-mini", tokens: 131_072, display_name: "o1 Mini" },
-                    { name: "gpt-4o", tokens: 131_072, display_name: "GPT-4 Omni" },
-                    { name: "gpt-4o-mini", tokens: 131_072, display_name: "GPT-4 Omni Mini" },
-                    { name: "gpt-4-turbo", tokens: 131_072, display_name: "GPT-4 Turbo" },
+                    { name: "gpt-4.1", tokens: 800_000, display_name: "GPT-4.1" },
+                    { name: "gpt-4.1-mini", tokens: 800_000, display_name: "GPT-4.1 Mini" },
+                    { name: "gpt-4.1-nano", tokens: 800_000, display_name: "GPT-4.1 Nano" },
                  ],
                  tokenizer: DiscourseAi::Tokenizer::OpenAiTokenizer,
                  endpoint: "https://api.openai.com/v1/chat/completions",
--- a/spec/lib/modules/ai_bot/playground_spec.rb
+++ b/spec/lib/modules/ai_bot/playground_spec.rb
@ -397,7 +397,15 @@ RSpec.describe DiscourseAi::AiBot::Playground do
          guardian: guardian,
        )

-        DiscourseAi::Completions::Llm.with_prepared_responses(["world"]) do |_, _, _prompts|
+        thinking =
+          DiscourseAi::Completions::Thinking.new(
+            message: "I should say hello",
+            signature: "thinking-signature-123",
+            partial: false,
+          )
+        DiscourseAi::Completions::Llm.with_prepared_responses(
+          [[thinking, thinking, "wo", "rld", thinking]],
+        ) do |_, _, _prompts|
          ChatSDK::Message.create(
            channel_id: channel.id,
            raw: "Hello @#{persona.user.username}",