FEATURE: Update model names and specs (#1262)
* FEATURE: Update model names and specs - not a bug, but made it explicit that tools and thinking are not a chat thing - updated all models to latest in presets (Gemini and OpenAI) * allow larger context windows
This commit is contained in:
parent
fe7e73a6a4
commit
274a54a324
|
@ -446,15 +446,14 @@ en:
|
|||
anthropic-claude-3-7-sonnet: "Anthropic's most intelligent model"
|
||||
anthropic-claude-3-5-haiku: "Fast and cost-effective"
|
||||
anthropic-claude-3-opus: "Excels at writing and complex tasks"
|
||||
google-gemini-1-5-pro: "Mid-sized multimodal model capable of a wide range of tasks"
|
||||
google-gemini-1-5-flash: "Lightweight, fast, and cost-efficient with multimodal reasoning"
|
||||
open_ai-gpt-4-turbo: "Previous generation high-intelligence model"
|
||||
open_ai-gpt-4o: "High intelligence model for complex, multi-step tasks"
|
||||
open_ai-gpt-4o-mini: "Affordable and fast small model for lightweight tasks"
|
||||
open_ai-o1-mini: "Cost-efficient reasoning model"
|
||||
open_ai-o1-preview: "Open AI's most capabale reasoning model (preview)"
|
||||
google-gemini-2-5-pro: "Mid-sized multimodal model capable of a wide range of tasks"
|
||||
google-gemini-2-0-flash: "Lightweight, fast, and cost-efficient with multimodal reasoning"
|
||||
google-gemini-2-0-flash-lite: "Cost efficient and low latency model"
|
||||
open_ai-o1: "Open AI's most capable reasoning model"
|
||||
open_ai-o3-mini: "Advanced Cost-efficient reasoning model"
|
||||
open_ai-gpt-4-1: "Open AI's flagship model. It is well suited for problem solving across domains"
|
||||
open_ai-gpt-4-1-mini: "Provides a balance between intelligence, speed, and cost that makes it an attractive model for many use cases."
|
||||
open_ai-gpt-4-1-nano: "Fastest, most cost-effective GPT-4.1 model."
|
||||
samba_nova-Meta-Llama-3-1-8B-Instruct: "Efficient lightweight multilingual model"
|
||||
samba_nova-Meta-Llama-3-3-70B-Instruct": "Powerful multipurpose model"
|
||||
mistral-mistral-large-latest: "Mistral's most powerful model"
|
||||
|
|
|
@ -247,7 +247,7 @@ module DiscourseAi
|
|||
|
||||
system_insts = <<~TEXT.strip
|
||||
You are titlebot. Given a conversation, you will suggest a title.
|
||||
|
||||
|
||||
- You will never respond with anything but the suggested title.
|
||||
- You will always match the conversation language in your title suggestion.
|
||||
- Title will capture the essence of the conversation.
|
||||
|
@ -255,11 +255,11 @@ module DiscourseAi
|
|||
|
||||
instruction = <<~TEXT.strip
|
||||
Given the following conversation:
|
||||
|
||||
|
||||
{{{
|
||||
#{conversation}
|
||||
}}}
|
||||
|
||||
|
||||
Reply only with a title that is 7 words or less.
|
||||
TEXT
|
||||
|
||||
|
@ -336,6 +336,8 @@ module DiscourseAi
|
|||
|
||||
new_prompts =
|
||||
bot.reply(context) do |partial, cancel, placeholder|
|
||||
# no support for tools or thinking by design
|
||||
next if !partial.is_a?(String)
|
||||
if !reply
|
||||
# just eat all leading spaces we can not create the message
|
||||
next if partial.blank?
|
||||
|
|
|
@ -42,18 +42,25 @@ module DiscourseAi
|
|||
id: "google",
|
||||
models: [
|
||||
{
|
||||
name: "gemini-1.5-pro",
|
||||
name: "gemini-2.5-pro",
|
||||
tokens: 800_000,
|
||||
endpoint:
|
||||
"https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-pro-latest",
|
||||
display_name: "Gemini 1.5 Pro",
|
||||
"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-pro-preview-03-25",
|
||||
display_name: "Gemini 2.5 Pro",
|
||||
},
|
||||
{
|
||||
name: "gemini-1.5-flash",
|
||||
name: "gemini-2.0-flash",
|
||||
tokens: 800_000,
|
||||
endpoint:
|
||||
"https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest",
|
||||
display_name: "Gemini 1.5 Flash",
|
||||
"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash",
|
||||
display_name: "Gemini 2.0 Flash",
|
||||
},
|
||||
{
|
||||
name: "gemini-2.0-flash-lite",
|
||||
tokens: 800_000,
|
||||
endpoint:
|
||||
"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-lite",
|
||||
display_name: "Gemini 2.0 Flash Lite",
|
||||
},
|
||||
],
|
||||
tokenizer: DiscourseAi::Tokenizer::GeminiTokenizer,
|
||||
|
@ -64,11 +71,9 @@ module DiscourseAi
|
|||
models: [
|
||||
{ name: "o3-mini", tokens: 200_000, display_name: "o3 Mini" },
|
||||
{ name: "o1", tokens: 200_000, display_name: "o1" },
|
||||
{ name: "o1-preview", tokens: 131_072, display_name: "o1 preview" },
|
||||
{ name: "o1-mini", tokens: 131_072, display_name: "o1 Mini" },
|
||||
{ name: "gpt-4o", tokens: 131_072, display_name: "GPT-4 Omni" },
|
||||
{ name: "gpt-4o-mini", tokens: 131_072, display_name: "GPT-4 Omni Mini" },
|
||||
{ name: "gpt-4-turbo", tokens: 131_072, display_name: "GPT-4 Turbo" },
|
||||
{ name: "gpt-4.1", tokens: 800_000, display_name: "GPT-4.1" },
|
||||
{ name: "gpt-4.1-mini", tokens: 800_000, display_name: "GPT-4.1 Mini" },
|
||||
{ name: "gpt-4.1-nano", tokens: 800_000, display_name: "GPT-4.1 Nano" },
|
||||
],
|
||||
tokenizer: DiscourseAi::Tokenizer::OpenAiTokenizer,
|
||||
endpoint: "https://api.openai.com/v1/chat/completions",
|
||||
|
|
|
@ -397,7 +397,15 @@ RSpec.describe DiscourseAi::AiBot::Playground do
|
|||
guardian: guardian,
|
||||
)
|
||||
|
||||
DiscourseAi::Completions::Llm.with_prepared_responses(["world"]) do |_, _, _prompts|
|
||||
thinking =
|
||||
DiscourseAi::Completions::Thinking.new(
|
||||
message: "I should say hello",
|
||||
signature: "thinking-signature-123",
|
||||
partial: false,
|
||||
)
|
||||
DiscourseAi::Completions::Llm.with_prepared_responses(
|
||||
[[thinking, thinking, "wo", "rld", thinking]],
|
||||
) do |_, _, _prompts|
|
||||
ChatSDK::Message.create(
|
||||
channel_id: channel.id,
|
||||
raw: "Hello @#{persona.user.username}",
|
||||
|
|
Loading…
Reference in New Issue