diff --git a/.gitignore b/.gitignore index e0744210..3f60c8fc 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,4 @@ node_modules .env evals/log evals/cases +config/eval-llms.local.yml diff --git a/README.md b/README.md index 6bbc5a8b..3a6a9477 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,8 @@ For more information, please see: https://meta.discourse.org/t/discourse-ai/2592 ### Evals -The directory `evals` contains AI evals for the Discourse AI plugin. +The directory `evals` contains AI evals for the Discourse AI plugin. +You may create a local config by copying `config/eval-llms.yml` to `config/eval-llms.local.yml` and modifying the values. To run them use: diff --git a/config/eval-llms.yml b/config/eval-llms.yml new file mode 100644 index 00000000..fe48dd24 --- /dev/null +++ b/config/eval-llms.yml @@ -0,0 +1,60 @@ +llms: + gpt-4o: + display_name: GPT-4o + name: gpt-4o + tokenizer: DiscourseAi::Tokenizer::OpenAiTokenizer + api_key_env: OPENAI_API_KEY + provider: open_ai + url: https://api.openai.com/v1/chat/completions + max_prompt_tokens: 131072 + vision_enabled: true + + gpt-4o-mini: + display_name: GPT-4o-mini + name: gpt-4o-mini + tokenizer: DiscourseAi::Tokenizer::OpenAiTokenizer + api_key_env: OPENAI_API_KEY + provider: open_ai + url: https://api.openai.com/v1/chat/completions + max_prompt_tokens: 131072 + vision_enabled: true + + claude-3.5-haiku: + display_name: Claude 3.5 Haiku + name: claude-3-5-haiku-latest + tokenizer: DiscourseAi::Tokenizer::AnthropicTokenizer + api_key_env: ANTHROPIC_API_KEY + provider: anthropic + url: https://api.anthropic.com/v1/messages + max_prompt_tokens: 200000 + vision_enabled: false + + claude-3.5-sonnet: + display_name: Claude 3.5 Sonnet + name: claude-3-5-sonnet-latest + tokenizer: DiscourseAi::Tokenizer::AnthropicTokenizer + api_key_env: ANTHROPIC_API_KEY + provider: anthropic + url: https://api.anthropic.com/v1/messages + max_prompt_tokens: 200000 + vision_enabled: true + + gemini-2.0-flash: + display_name: Gemini 2.0 Flash + name: gemini-2-0-flash + tokenizer: DiscourseAi::Tokenizer::GeminiTokenizer + api_key_env: GEMINI_API_KEY + provider: google + url: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash + max_prompt_tokens: 1000000 + vision_enabled: true + + gemini-2.0-pro-exp: + display_name: Gemini 2.0 pro + name: gemini-2-0-pro-exp + tokenizer: DiscourseAi::Tokenizer::GeminiTokenizer + api_key_env: GEMINI_API_KEY + provider: google + url: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-pro-exp + max_prompt_tokens: 1000000 + vision_enabled: true diff --git a/evals/lib/llm.rb b/evals/lib/llm.rb index 1e31a50a..bb1d40de 100644 --- a/evals/lib/llm.rb +++ b/evals/lib/llm.rb @@ -1,71 +1,23 @@ # frozen_string_literal: true class DiscourseAi::Evals::Llm - CONFIGS = { - "gpt-4o" => { - display_name: "GPT-4o", - name: "gpt-4o", - tokenizer: "DiscourseAi::Tokenizer::OpenAiTokenizer", - api_key_env: "OPENAI_API_KEY", - provider: "open_ai", - url: "https://api.openai.com/v1/chat/completions", - max_prompt_tokens: 131_072, - vision_enabled: true, - }, - "gpt-4o-mini" => { - display_name: "GPT-4o-mini", - name: "gpt-4o-mini", - tokenizer: "DiscourseAi::Tokenizer::OpenAiTokenizer", - api_key_env: "OPENAI_API_KEY", - provider: "open_ai", - url: "https://api.openai.com/v1/chat/completions", - max_prompt_tokens: 131_072, - vision_enabled: true, - }, - "claude-3.5-haiku" => { - display_name: "Claude 3.5 Haiku", - name: "claude-3-5-haiku-latest", - tokenizer: "DiscourseAi::Tokenizer::AnthropicTokenizer", - api_key_env: "ANTHROPIC_API_KEY", - provider: "anthropic", - url: "https://api.anthropic.com/v1/messages", - max_prompt_tokens: 200_000, - vision_enabled: false, - }, - "claude-3.5-sonnet" => { - display_name: "Claude 3.5 Sonnet", - name: "claude-3-5-sonnet-latest", - tokenizer: "DiscourseAi::Tokenizer::AnthropicTokenizer", - api_key_env: "ANTHROPIC_API_KEY", - provider: "anthropic", - url: "https://api.anthropic.com/v1/messages", - max_prompt_tokens: 200_000, - vision_enabled: true, - }, - "gemini-2.0-flash" => { - display_name: "Gemini 2.0 Flash", - name: "gemini-2-0-flash", - tokenizer: "DiscourseAi::Tokenizer::GeminiTokenizer", - api_key_env: "GEMINI_API_KEY", - provider: "google", - url: "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash", - max_prompt_tokens: 1_000_000, - vision_enabled: true, - }, - "gemini-2.0-pro-exp" => { - display_name: "Gemini 2.0 pro", - name: "gemini-2-0-pro-exp", - tokenizer: "DiscourseAi::Tokenizer::GeminiTokenizer", - api_key_env: "GEMINI_API_KEY", - provider: "google", - url: "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-pro-exp", - max_prompt_tokens: 1_000_000, - vision_enabled: true, - }, - } + def self.configs + return @configs if @configs + + yaml_path = File.join(File.dirname(__FILE__), "../../config/eval-llms.yml") + local_yaml_path = File.join(File.dirname(__FILE__), "../../config/eval-llms.local.yml") + + configs = YAML.load_file(yaml_path)["llms"] || {} + if File.exist?(local_yaml_path) + local_configs = YAML.load_file(local_yaml_path)["llms"] || {} + configs = configs.merge(local_configs) + end + + @configs = configs + end def self.print - CONFIGS + configs .keys .map do |config_name| begin @@ -79,38 +31,39 @@ class DiscourseAi::Evals::Llm end def self.choose(config_name) - if CONFIGS[config_name].nil? - CONFIGS + return [] unless configs + if !config_name || !configs[config_name] + configs .keys - .map do |config_name| + .map do |name| begin - new(config_name) - rescue => e - puts "Error initializing #{config_name}: #{e}" + new(name) + rescue StandardError nil end end .compact - elsif !CONFIGS.include?(config_name) - raise "Invalid llm" else [new(config_name)] end end - attr_reader :llm_model - attr_reader :llm_proxy - attr_reader :config_name + attr_reader :llm_model, :llm_proxy, :config_name def initialize(config_name) - config = CONFIGS[config_name].dup - api_key_env = config.delete(:api_key_env) - if !ENV[api_key_env] - raise "Missing API key for #{config_name}, should be set via #{api_key_env}" + config = self.class.configs[config_name].dup + if config["api_key_env"] + api_key_env = config.delete("api_key_env") + unless ENV[api_key_env] + raise "Missing API key for #{config_name}, should be set via #{api_key_env}" + end + config[:api_key] = ENV[api_key_env] + elsif config["api_key"] + config[:api_key] = config.delete("api_key") + else + raise "No API key or API key env var configured for #{config_name}" end - - config[:api_key] = ENV[api_key_env] - @llm_model = LlmModel.new(config) + @llm_model = LlmModel.new(config.symbolize_keys) @llm_proxy = DiscourseAi::Completions::Llm.proxy(@llm_model) @config_name = config_name end