DEV: Extract configs to a yml file and allow local config (#1142)

This commit is contained in:
Natalie Tay 2025-02-24 13:22:19 +08:00 committed by GitHub
parent 08377bab35
commit 2486e0e2dd
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 97 additions and 82 deletions

1
.gitignore vendored
View File

@ -4,3 +4,4 @@ node_modules
.env .env
evals/log evals/log
evals/cases evals/cases
config/eval-llms.local.yml

View File

@ -7,6 +7,7 @@ For more information, please see: https://meta.discourse.org/t/discourse-ai/2592
### Evals ### Evals
The directory `evals` contains AI evals for the Discourse AI plugin. The directory `evals` contains AI evals for the Discourse AI plugin.
You may create a local config by copying `config/eval-llms.yml` to `config/eval-llms.local.yml` and modifying the values.
To run them use: To run them use:

60
config/eval-llms.yml Normal file
View File

@ -0,0 +1,60 @@
llms:
gpt-4o:
display_name: GPT-4o
name: gpt-4o
tokenizer: DiscourseAi::Tokenizer::OpenAiTokenizer
api_key_env: OPENAI_API_KEY
provider: open_ai
url: https://api.openai.com/v1/chat/completions
max_prompt_tokens: 131072
vision_enabled: true
gpt-4o-mini:
display_name: GPT-4o-mini
name: gpt-4o-mini
tokenizer: DiscourseAi::Tokenizer::OpenAiTokenizer
api_key_env: OPENAI_API_KEY
provider: open_ai
url: https://api.openai.com/v1/chat/completions
max_prompt_tokens: 131072
vision_enabled: true
claude-3.5-haiku:
display_name: Claude 3.5 Haiku
name: claude-3-5-haiku-latest
tokenizer: DiscourseAi::Tokenizer::AnthropicTokenizer
api_key_env: ANTHROPIC_API_KEY
provider: anthropic
url: https://api.anthropic.com/v1/messages
max_prompt_tokens: 200000
vision_enabled: false
claude-3.5-sonnet:
display_name: Claude 3.5 Sonnet
name: claude-3-5-sonnet-latest
tokenizer: DiscourseAi::Tokenizer::AnthropicTokenizer
api_key_env: ANTHROPIC_API_KEY
provider: anthropic
url: https://api.anthropic.com/v1/messages
max_prompt_tokens: 200000
vision_enabled: true
gemini-2.0-flash:
display_name: Gemini 2.0 Flash
name: gemini-2-0-flash
tokenizer: DiscourseAi::Tokenizer::GeminiTokenizer
api_key_env: GEMINI_API_KEY
provider: google
url: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash
max_prompt_tokens: 1000000
vision_enabled: true
gemini-2.0-pro-exp:
display_name: Gemini 2.0 pro
name: gemini-2-0-pro-exp
tokenizer: DiscourseAi::Tokenizer::GeminiTokenizer
api_key_env: GEMINI_API_KEY
provider: google
url: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-pro-exp
max_prompt_tokens: 1000000
vision_enabled: true

View File

@ -1,71 +1,23 @@
# frozen_string_literal: true # frozen_string_literal: true
class DiscourseAi::Evals::Llm class DiscourseAi::Evals::Llm
CONFIGS = { def self.configs
"gpt-4o" => { return @configs if @configs
display_name: "GPT-4o",
name: "gpt-4o", yaml_path = File.join(File.dirname(__FILE__), "../../config/eval-llms.yml")
tokenizer: "DiscourseAi::Tokenizer::OpenAiTokenizer", local_yaml_path = File.join(File.dirname(__FILE__), "../../config/eval-llms.local.yml")
api_key_env: "OPENAI_API_KEY",
provider: "open_ai", configs = YAML.load_file(yaml_path)["llms"] || {}
url: "https://api.openai.com/v1/chat/completions", if File.exist?(local_yaml_path)
max_prompt_tokens: 131_072, local_configs = YAML.load_file(local_yaml_path)["llms"] || {}
vision_enabled: true, configs = configs.merge(local_configs)
}, end
"gpt-4o-mini" => {
display_name: "GPT-4o-mini", @configs = configs
name: "gpt-4o-mini", end
tokenizer: "DiscourseAi::Tokenizer::OpenAiTokenizer",
api_key_env: "OPENAI_API_KEY",
provider: "open_ai",
url: "https://api.openai.com/v1/chat/completions",
max_prompt_tokens: 131_072,
vision_enabled: true,
},
"claude-3.5-haiku" => {
display_name: "Claude 3.5 Haiku",
name: "claude-3-5-haiku-latest",
tokenizer: "DiscourseAi::Tokenizer::AnthropicTokenizer",
api_key_env: "ANTHROPIC_API_KEY",
provider: "anthropic",
url: "https://api.anthropic.com/v1/messages",
max_prompt_tokens: 200_000,
vision_enabled: false,
},
"claude-3.5-sonnet" => {
display_name: "Claude 3.5 Sonnet",
name: "claude-3-5-sonnet-latest",
tokenizer: "DiscourseAi::Tokenizer::AnthropicTokenizer",
api_key_env: "ANTHROPIC_API_KEY",
provider: "anthropic",
url: "https://api.anthropic.com/v1/messages",
max_prompt_tokens: 200_000,
vision_enabled: true,
},
"gemini-2.0-flash" => {
display_name: "Gemini 2.0 Flash",
name: "gemini-2-0-flash",
tokenizer: "DiscourseAi::Tokenizer::GeminiTokenizer",
api_key_env: "GEMINI_API_KEY",
provider: "google",
url: "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash",
max_prompt_tokens: 1_000_000,
vision_enabled: true,
},
"gemini-2.0-pro-exp" => {
display_name: "Gemini 2.0 pro",
name: "gemini-2-0-pro-exp",
tokenizer: "DiscourseAi::Tokenizer::GeminiTokenizer",
api_key_env: "GEMINI_API_KEY",
provider: "google",
url: "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-pro-exp",
max_prompt_tokens: 1_000_000,
vision_enabled: true,
},
}
def self.print def self.print
CONFIGS configs
.keys .keys
.map do |config_name| .map do |config_name|
begin begin
@ -79,38 +31,39 @@ class DiscourseAi::Evals::Llm
end end
def self.choose(config_name) def self.choose(config_name)
if CONFIGS[config_name].nil? return [] unless configs
CONFIGS if !config_name || !configs[config_name]
configs
.keys .keys
.map do |config_name| .map do |name|
begin begin
new(config_name) new(name)
rescue => e rescue StandardError
puts "Error initializing #{config_name}: #{e}"
nil nil
end end
end end
.compact .compact
elsif !CONFIGS.include?(config_name)
raise "Invalid llm"
else else
[new(config_name)] [new(config_name)]
end end
end end
attr_reader :llm_model attr_reader :llm_model, :llm_proxy, :config_name
attr_reader :llm_proxy
attr_reader :config_name
def initialize(config_name) def initialize(config_name)
config = CONFIGS[config_name].dup config = self.class.configs[config_name].dup
api_key_env = config.delete(:api_key_env) if config["api_key_env"]
if !ENV[api_key_env] api_key_env = config.delete("api_key_env")
unless ENV[api_key_env]
raise "Missing API key for #{config_name}, should be set via #{api_key_env}" raise "Missing API key for #{config_name}, should be set via #{api_key_env}"
end end
config[:api_key] = ENV[api_key_env] config[:api_key] = ENV[api_key_env]
@llm_model = LlmModel.new(config) elsif config["api_key"]
config[:api_key] = config.delete("api_key")
else
raise "No API key or API key env var configured for #{config_name}"
end
@llm_model = LlmModel.new(config.symbolize_keys)
@llm_proxy = DiscourseAi::Completions::Llm.proxy(@llm_model) @llm_proxy = DiscourseAi::Completions::Llm.proxy(@llm_model)
@config_name = config_name @config_name = config_name
end end