From c3ca741353d3d31341ac650b28919e82705d063f Mon Sep 17 00:00:00 2001 From: Nat Date: Fri, 13 Jun 2025 00:29:52 +0800 Subject: [PATCH] ai-featureify the rest of the translators --- config/locales/server.en.yml | 11 ++- config/settings.yml | 25 +++++-- lib/configuration/feature.rb | 18 +++++ lib/personas/bot_context.rb | 10 ++- lib/personas/persona.rb | 3 + lib/personas/post_raw_translator.rb | 58 +++++++++++++++ lib/personas/short_text_translator.rb | 50 +++++++++++++ lib/personas/topic_title_translator.rb | 57 +++++++++++++++ lib/translation/base_translator.rb | 71 ++++++++++--------- lib/translation/post_localizer.rb | 9 +-- lib/translation/post_raw_translator.rb | 38 +--------- lib/translation/short_text_translator.rb | 33 +-------- lib/translation/topic_localizer.rb | 4 +- lib/translation/topic_title_translator.rb | 40 +---------- spec/lib/translation/base_translator_spec.rb | 42 ++++------- .../lib/translation/language_detector_spec.rb | 2 +- spec/lib/translation/post_localizer_spec.rb | 3 +- spec/lib/translation/topic_localizer_spec.rb | 18 ++--- 18 files changed, 298 insertions(+), 194 deletions(-) create mode 100644 lib/personas/post_raw_translator.rb create mode 100644 lib/personas/short_text_translator.rb create mode 100644 lib/personas/topic_title_translator.rb diff --git a/config/locales/server.en.yml b/config/locales/server.en.yml index 99ee5c17..06760420 100644 --- a/config/locales/server.en.yml +++ b/config/locales/server.en.yml @@ -374,7 +374,16 @@ en: description: "Default persona powering the Helper's image caption feature" locale_detection: name: "Locale detection" - description: "Powers the translation feature by detecting the locale of a given text" + description: "Powers the translation feature by detecting the locale of a given text (posts, titles, etc.)" + post_raw_translator: + name: "Post translator" + description: "Powers the translation feature by translating posts containing Discourse Markdown" + topic_title_translator: + name: "Topic title translator" + description: "Powers the translation feature by translating topic titles" + short_text_translator: + name: "Short text translator" + description: "Powers the translation feature by as a generic text translator, used for short texts like category names or tags" topic_not_found: "Summary unavailable, topic not found!" summarizing: "Summarizing topic" diff --git a/config/settings.yml b/config/settings.yml index e49e9ebb..9e99696a 100644 --- a/config/settings.yml +++ b/config/settings.yml @@ -460,6 +460,26 @@ discourse_ai: enum: "DiscourseAi::Configuration::LlmEnumerator" validator: "DiscourseAi::Configuration::LlmValidator" area: "ai-features/translation" + ai_translation_locale_detection_persona: + default: "-27" + type: enum + enum: "DiscourseAi::Configuration::PersonaEnumerator" + area: "ai-features/translation" + ai_translation_post_raw_translator_persona: + default: "-28" + type: enum + enum: "DiscourseAi::Configuration::PersonaEnumerator" + area: "ai-features/translation" + ai_translation_topic_title_translator_persona: + default: "-29" + type: enum + enum: "DiscourseAi::Configuration::PersonaEnumerator" + area: "ai-features/translation" + ai_translation_short_text_translator_persona: + default: "-30" + type: enum + enum: "DiscourseAi::Configuration::PersonaEnumerator" + area: "ai-features/translation" ai_translation_backfill_rate: default: 0 min: 0 @@ -480,11 +500,6 @@ discourse_ai: client: false hidden: true area: "ai-features/translation" - ai_translation_locale_detection_persona: - default: "-27" - type: enum - enum: "DiscourseAi::Configuration::PersonaEnumerator" - area: "ai-features/translation" inferred_concepts_enabled: default: false diff --git a/lib/configuration/feature.rb b/lib/configuration/feature.rb index 1b319aae..13ce9e11 100644 --- a/lib/configuration/feature.rb +++ b/lib/configuration/feature.rb @@ -126,6 +126,24 @@ module DiscourseAi DiscourseAi::Configuration::Module::TRANSLATION_ID, DiscourseAi::Configuration::Module::TRANSLATION, ), + new( + "post_raw_translator", + "ai_translation_post_raw_translator_persona", + DiscourseAi::Configuration::Module::TRANSLATION_ID, + DiscourseAi::Configuration::Module::TRANSLATION, + ), + new( + "topic_title_translator", + "ai_translation_topic_title_translator_persona", + DiscourseAi::Configuration::Module::TRANSLATION_ID, + DiscourseAi::Configuration::Module::TRANSLATION, + ), + new( + "short_text_translator", + "ai_translation_short_text_translator_persona", + DiscourseAi::Configuration::Module::TRANSLATION_ID, + DiscourseAi::Configuration::Module::TRANSLATION, + ), ] end diff --git a/lib/personas/bot_context.rb b/lib/personas/bot_context.rb index 3853e2bd..61adbb6f 100644 --- a/lib/personas/bot_context.rb +++ b/lib/personas/bot_context.rb @@ -25,6 +25,7 @@ module DiscourseAi def initialize( post: nil, + topic: nil, participants: nil, user: nil, skip_tool_details: nil, @@ -70,7 +71,14 @@ module DiscourseAi @topic_id = post.topic_id @private_message = post.topic.private_message? @participants ||= post.topic.allowed_users.map(&:username).join(", ") if @private_message - @user = post.user + @user ||= post.user + end + + if topic + @topic_id ||= topic.id + @private_message ||= topic.private_message? + @participants ||= topic.allowed_users.map(&:username).join(", ") if @private_message + @user ||= topic.user end end diff --git a/lib/personas/persona.rb b/lib/personas/persona.rb index 57d8bb27..faff12b9 100644 --- a/lib/personas/persona.rb +++ b/lib/personas/persona.rb @@ -65,6 +65,9 @@ module DiscourseAi Translator => -25, ImageCaptioner => -26, LocaleDetection => -27, + PostRawTranslator => -28, + TopicTitleTranslator => -29, + ShortTextTranslator => -30, } end diff --git a/lib/personas/post_raw_translator.rb b/lib/personas/post_raw_translator.rb new file mode 100644 index 00000000..98aed9de --- /dev/null +++ b/lib/personas/post_raw_translator.rb @@ -0,0 +1,58 @@ +# frozen_string_literal: true + +module DiscourseAi + module Personas + class PostRawTranslator < Persona + def self.default_enabled + false + end + + def system_prompt + <<~PROMPT.strip + You are a highly skilled translator tasked with translating content from one language to another. Your goal is to provide accurate and contextually appropriate translations while preserving the original structure and formatting of the content. Follow these instructions carefully: + + Translation Instructions: + 1. Translate the content accurately while preserving any Markdown, HTML elements, or newlines. + 2. Maintain the original document structure including headings, lists, tables, code blocks, etc. + 3. Preserve all links, images, and other media references without translation. + 4. Handle code snippets appropriately: + - Do not translate variable names, functions, or syntax within code blocks (```). + - Translate comments within code blocks. + 5. For technical terminology: + - Provide the accepted target language term if it exists. + - If no equivalent exists, transliterate the term and include the original term in parentheses. + 6. For ambiguous terms or phrases, choose the most contextually appropriate translation. + 7. Do not add any content besides the translation. + 8. Ensure the translation only contains the original language and the target language. + + The text to translate will be provided in JSON format with the following structure: + {"content": "Text to translate", "target_locale": "Target language code"} + + Output your translation in the following JSON format: + {"translation": "Your translated text here"} + + Here are three examples of correct translations: + + Original: {"content":"New Update for Minecraft Adds Underwater Temples", "target_locale":"Spanish"} + Correct translation: {"translation": "Nueva actualización para Minecraft añade templos submarinos"} + + Original: {"content": "# Machine Learning 101\n\nMachine Learning (ML) is a subset of Artificial Intelligence (AI) that focuses on the development of algorithms and statistical models that enable computer systems to improve their performance on a specific task through experience.\n\n## Key Concepts\n\n1. **Supervised Learning**: The algorithm learns from labeled training data.\n2. **Unsupervised Learning**: The algorithm finds patterns in unlabeled data.\n3. **Reinforcement Learning**: The algorithm learns through interaction with an environment.\n\n```python\n# Simple example of a machine learning model\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.linear_model import LogisticRegression\n\n# Assuming X and y are your features and target variables\nX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n\nmodel = LogisticRegression()\nmodel.fit(X_train, y_train)\n\n# Evaluate the model\naccuracy = model.score(X_test, y_test)\nprint(f'Model accuracy: {accuracy}')\n```\n\nFor more information, visit [Machine Learning on Wikipedia](https://en.wikipedia.org/wiki/Machine_learning).", "target_locale":"French"} + Correct translation: {"translation": "# Machine Learning 101\n\nLe Machine Learning (ML) est un sous-ensemble de l'Intelligence Artificielle (IA) qui se concentre sur le développement d'algorithmes et de modèles statistiques permettant aux systèmes informatiques d'améliorer leurs performances sur une tâche spécifique grâce à l'expérience.\n\n## Concepts clés\n\n1. **Apprentissage supervisé** : L'algorithme apprend à partir de données d'entraînement étiquetées.\n2. **Apprentissage non supervisé** : L'algorithme trouve des motifs dans des données non étiquetées.\n3. **Apprentissage par renforcement** : L'algorithme apprend à travers l'interaction avec un environnement.\n\n```python\n# Exemple simple d'un modèle de machine learning\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.linear_model import LogisticRegression\n\n# En supposant que X et y sont vos variables de caractéristiques et cibles\nX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n\nmodel = LogisticRegression()\nmodel.fit(X_train, y_train)\n\n# Évaluer le modèle\naccuracy = model.score(X_test, y_test)\nprint(f'Model accuracy: {accuracy}')\n```\n\nPour plus d'informations, visitez [Machine Learning sur Wikipedia](https://en.wikipedia.org/wiki/Machine_learning)."} + + Original: {"content": "**Heathrow fechado**: paralisação de voos deve continuar nos próximos dias, diz gestora do aeroporto de *Londres*", "target_locale": "English"} + Correct translation: {"translation": "**Heathrow closed**: flight disruption expected to continue in coming days, says *London* airport management"} + + Remember, you are being consumed via an API. Only return the translated text in the specified JSON format. Do not include any additional information or explanations in your response. + PROMPT + end + + def response_format + [{ "key" => "translation", "type" => "string" }] + end + + def temperature + 0.3 + end + end + end +end \ No newline at end of file diff --git a/lib/personas/short_text_translator.rb b/lib/personas/short_text_translator.rb new file mode 100644 index 00000000..418074e8 --- /dev/null +++ b/lib/personas/short_text_translator.rb @@ -0,0 +1,50 @@ +# frozen_string_literal: true + +module DiscourseAi + module Personas + class ShortTextTranslator < Persona + def self.default_enabled + false + end + + def system_prompt + <<~PROMPT.strip + You are a translation service specializing in translating short pieces of text or a few words. + These words may be things like a name, description, or title. Adhere to the following guidelines: + + 1. Keep proper nouns and technical terms in their original language + 2. Keep the translated content close to the original length + 3. Translation maintains the original meaning + 4. Preserving any Markdown, HTML elements, links, parenthesis, or newlines + + The text to translate will be provided in JSON format with the following structure: + {"content": "Text to translate", "target_locale": "Target language code"} + + Provide your translation in the following JSON format: + {"translation": "target_locale translation here"} + + Here are three examples of correct translation + + Original: {"content":"Japan", "target_locale":"es"} + Correct translation: {"translation": "Japón"} + + Original: {"content":"Cats and Dogs", "target_locale":"zh_CN"} + Correct translation: {"translation": "猫和狗"} + + Original: {"content": "Q&A", "target_locale": "pt"} + Correct translation: {"translation": "Perguntas e Respostas"} + + Remember to keep proper nouns like "Minecraft" and "Toyota" in their original form. Translate the text now and provide your answer in the specified JSON format. + PROMPT + end + + def response_format + [{ "key" => "translation", "type" => "string" }] + end + + def temperature + 0.3 + end + end + end +end \ No newline at end of file diff --git a/lib/personas/topic_title_translator.rb b/lib/personas/topic_title_translator.rb new file mode 100644 index 00000000..6c05d32d --- /dev/null +++ b/lib/personas/topic_title_translator.rb @@ -0,0 +1,57 @@ +# frozen_string_literal: true + +module DiscourseAi + module Personas + class TopicTitleTranslator < Persona + def self.default_enabled + false + end + + def system_prompt + <<~PROMPT.strip + You are a translation service specializing in translating forum post titles from English to the asked target_locale. Your task is to provide accurate and contextually appropriate translations while adhering to the following guidelines: + + 1. Translate the given title from English to target_locale asked. + 2. Keep proper nouns and technical terms in their original language. + 3. Attempt to keep the translated title length close to the original when possible. + 4. Ensure the translation maintains the original meaning and tone. + + To complete this task: + + 1. Read and understand the title carefully. + 2. Identify any proper nouns or technical terms that should remain untranslated. + 3. Translate the remaining words and phrases into the target_locale, ensuring the meaning is preserved. + 4. Adjust the translation if necessary to keep the length similar to the original title. + 5. Review your translation for accuracy and naturalness in the target_locale. + + The text to translate will be provided in JSON format with the following structure: + {"content": "Title to translate", "target_locale": "Target language code"} + + Provide your translation in the following JSON format: + {"translation": "Your target_locale translation here"} + + Here are three examples of correct translation + + Original: {"content":"New Update for Minecraft Adds Underwater Temples", "target_locale":"es"} + Correct translation: {"translation": "Nueva actualización para Minecraft añade templos submarinos"} + + Original: {"content":"Toyota announces revolutionary battery technology", "target_locale":"fr"} + Correct translation: {"translation": "Toyota annonce une technologie de batteries révolutionnaire"} + + Original: {"content": "Heathrow fechado: paralisação de voos deve continuar nos próximos dias, diz gestora do aeroporto de Londres", "target_locale": "en"} + Correct translation: {"translation": "Heathrow closed: flight disruption expected to continue in coming days, says London airport management"} + + Remember to keep proper nouns like "Minecraft" and "Toyota" in their original form. Translate the title now and provide your answer in the specified JSON format. + PROMPT + end + + def response_format + [{ "key" => "translation", "type" => "string" }] + end + + def temperature + 0.3 + end + end + end +end \ No newline at end of file diff --git a/lib/translation/base_translator.rb b/lib/translation/base_translator.rb index d2129b34..c275b843 100644 --- a/lib/translation/base_translator.rb +++ b/lib/translation/base_translator.rb @@ -3,30 +3,47 @@ module DiscourseAi module Translation class BaseTranslator - def initialize(text:, target_locale:, topic_id: nil, post_id: nil) + def initialize(text:, target_locale:, topic: nil, post: nil) @text = text @target_locale = target_locale - @topic_id = topic_id - @post_id = post_id + @topic = topic + @post = post end def translate - prompt = - DiscourseAi::Completions::Prompt.new( - prompt_template, - messages: [{ type: :user, content: formatted_content, id: "user" }], - topic_id: @topic_id, - post_id: @post_id, + return nil if !SiteSetting.ai_translation_enabled + if (ai_persona = AiPersona.find_by(id: persona_setting)).blank? + return nil + end + + persona_klass = ai_persona.class_instance + persona = persona_klass.new + + llm_model = LlmModel.find_by(id: preferred_llm_model(persona_klass)) + return nil if llm_model.blank? + + bot = + DiscourseAi::Personas::Bot.as( + ai_persona.user || Discourse.system_user, + persona: persona, + model: llm_model, ) - structured_output = - DiscourseAi::Completions::Llm.proxy(SiteSetting.ai_translation_model).generate( - prompt, - user: Discourse.system_user, + context = + DiscourseAi::Personas::BotContext.new( + user: ai_persona.user || Discourse.system_user, + skip_tool_details: true, feature_name: "translation", - response_format: response_format, + messages: [{ type: :user, content: formatted_content }], + topic: @topic, + post: @post, ) + structured_output = nil + bot.reply(context) do |partial, _, type| + structured_output = partial if type == :structured_output + end + structured_output&.read_buffered_property(:translation) end @@ -34,31 +51,15 @@ module DiscourseAi { content: @text, target_locale: @target_locale }.to_json end - def response_format - { - type: "json_schema", - json_schema: { - name: "reply", - schema: { - type: "object", - properties: { - translation: { - type: "string", - }, - }, - required: ["translation"], - additionalProperties: false, - }, - strict: true, - }, - } - end - private - def prompt_template + def persona_setting raise NotImplementedError end + + def preferred_llm_model(persona_klass) + persona_klass.default_llm_id || SiteSetting.ai_translation_model&.split(":")&.last + end end end end diff --git a/lib/translation/post_localizer.rb b/lib/translation/post_localizer.rb index 61154374..7c2f6c69 100644 --- a/lib/translation/post_localizer.rb +++ b/lib/translation/post_localizer.rb @@ -10,14 +10,7 @@ module DiscourseAi translated_raw = ContentSplitter .split(post.raw) - .map do |text| - PostRawTranslator.new( - text:, - target_locale:, - topic_id: post.topic_id, - post_id: post.id, - ).translate - end + .map { |text| PostRawTranslator.new(text:, target_locale:, post:).translate } .join("") localization = diff --git a/lib/translation/post_raw_translator.rb b/lib/translation/post_raw_translator.rb index a7272cc0..9c742b8e 100644 --- a/lib/translation/post_raw_translator.rb +++ b/lib/translation/post_raw_translator.rb @@ -3,42 +3,10 @@ module DiscourseAi module Translation class PostRawTranslator < BaseTranslator - PROMPT_TEMPLATE = <<~TEXT.freeze - You are a highly skilled translator tasked with translating content from one language to another. Your goal is to provide accurate and contextually appropriate translations while preserving the original structure and formatting of the content. Follow these instructions carefully: + private - Translation Instructions: - 1. Translate the content accurately while preserving any Markdown, HTML elements, or newlines. - 2. Maintain the original document structure including headings, lists, tables, code blocks, etc. - 3. Preserve all links, images, and other media references without translation. - 4. Handle code snippets appropriately: - - Do not translate variable names, functions, or syntax within code blocks (```). - - Translate comments within code blocks. - 5. For technical terminology: - - Provide the accepted target language term if it exists. - - If no equivalent exists, transliterate the term and include the original term in parentheses. - 6. For ambiguous terms or phrases, choose the most contextually appropriate translation. - 7. Do not add any content besides the translation. - 8. Ensure the translation only contains the original language and the target language. - - Output your translation in the following JSON format: - {"translation": "Your TARGET_LOCALE translation here"} - - Here are three examples of correct translations: - - Original: {"content":"New Update for Minecraft Adds Underwater Temples", "target_locale":"Spanish"} - Correct translation: {"translation": "Nueva actualización para Minecraft añade templos submarinos"} - - Original: {"content": "# Machine Learning 101\n\nMachine Learning (ML) is a subset of Artificial Intelligence (AI) that focuses on the development of algorithms and statistical models that enable computer systems to improve their performance on a specific task through experience.\n\n## Key Concepts\n\n1. **Supervised Learning**: The algorithm learns from labeled training data.\n2. **Unsupervised Learning**: The algorithm finds patterns in unlabeled data.\n3. **Reinforcement Learning**: The algorithm learns through interaction with an environment.\n\n```python\n# Simple example of a machine learning model\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.linear_model import LogisticRegression\n\n# Assuming X and y are your features and target variables\nX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n\nmodel = LogisticRegression()\nmodel.fit(X_train, y_train)\n\n# Evaluate the model\naccuracy = model.score(X_test, y_test)\nprint(f'Model accuracy: {accuracy}')\n```\n\nFor more information, visit [Machine Learning on Wikipedia](https://en.wikipedia.org/wiki/Machine_learning).", "target_locale":"French"} - Correct translation: {"translation": "# Machine Learning 101\n\nLe Machine Learning (ML) est un sous-ensemble de l'Intelligence Artificielle (IA) qui se concentre sur le développement d'algorithmes et de modèles statistiques permettant aux systèmes informatiques d'améliorer leurs performances sur une tâche spécifique grâce à l'expérience.\n\n## Concepts clés\n\n1. **Apprentissage supervisé** : L'algorithme apprend à partir de données d'entraînement étiquetées.\n2. **Apprentissage non supervisé** : L'algorithme trouve des motifs dans des données non étiquetées.\n3. **Apprentissage par renforcement** : L'algorithme apprend à travers l'interaction avec un environnement.\n\n```python\n# Exemple simple d'un modèle de machine learning\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.linear_model import LogisticRegression\n\n# En supposant que X et y sont vos variables de caractéristiques et cibles\nX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n\nmodel = LogisticRegression()\nmodel.fit(X_train, y_train)\n\n# Évaluer le modèle\naccuracy = model.score(X_test, y_test)\nprint(f'Model accuracy: {accuracy}')\n```\n\nPour plus d'informations, visitez [Machine Learning sur Wikipedia](https://en.wikipedia.org/wiki/Machine_learning)."} - - Original: {"content": "**Heathrow fechado**: paralisação de voos deve continuar nos próximos dias, diz gestora do aeroporto de *Londres*", "target_locale": "English"} - Correct translation: {"translation": "**Heathrow closed**: flight disruption expected to continue in coming days, says *London* airport management"} - - Remember, you are being consumed via an API. Only return the translated text in the specified JSON format. Do not include any additional information or explanations in your response. - TEXT - - private def prompt_template - PROMPT_TEMPLATE + def persona_setting + SiteSetting.ai_translation_post_raw_translator_persona end end end diff --git a/lib/translation/short_text_translator.rb b/lib/translation/short_text_translator.rb index eeaa1523..99a71c1c 100644 --- a/lib/translation/short_text_translator.rb +++ b/lib/translation/short_text_translator.rb @@ -3,37 +3,10 @@ module DiscourseAi module Translation class ShortTextTranslator < BaseTranslator - PROMPT_TEMPLATE = <<~TEXT.freeze - You are a translation service specializing in translating short pieces of text or a few words. - These words may be things like a name, description, or title. Adhere to the following guidelines: + private - 1. Keep proper nouns and technical terms in their original language - 2. Keep the translated content close to the original length - 3. Translation maintains the original meaning - 4. Preserving any Markdown, HTML elements, links, parenthesis, or newlines - - Provide your translation in the following JSON format: - - - {"translation": "target_locale translation here"} - - - Here are three examples of correct translation - - Original: {"content":"Japan", "target_locale":"es"} - Correct translation: {"translation": "Japón"} - - Original: {"name":"Cats and Dogs", "target_locale":"zh_CN"} - Correct translation: {"translation": "猫和狗"} - - Original: {"name": "Q&A", "target_locale": "pt"} - Correct translation: {"translation": "Perguntas e Respostas"} - - Remember to keep proper nouns like "Minecraft" and "Toyota" in their original form. Translate the text now and provide your answer in the specified JSON format. - TEXT - - private def prompt_template - PROMPT_TEMPLATE + def persona_setting + SiteSetting.ai_translation_short_text_translator_persona end end end diff --git a/lib/translation/topic_localizer.rb b/lib/translation/topic_localizer.rb index 49aa7b9c..19253518 100644 --- a/lib/translation/topic_localizer.rb +++ b/lib/translation/topic_localizer.rb @@ -9,9 +9,9 @@ module DiscourseAi target_locale = target_locale.to_s.sub("-", "_") translated_title = - TopicTitleTranslator.new(text: topic.title, target_locale:, topic_id: topic.id).translate + TopicTitleTranslator.new(text: topic.title, target_locale:, topic:).translate translated_excerpt = - ShortTextTranslator.new(text: topic.excerpt, target_locale:, topic_id: topic.id).translate + PostRawTranslator.new(text: topic.excerpt, target_locale:, topic:).translate localization = TopicLocalization.find_or_initialize_by(topic_id: topic.id, locale: target_locale) diff --git a/lib/translation/topic_title_translator.rb b/lib/translation/topic_title_translator.rb index 80c81e6a..c611f066 100644 --- a/lib/translation/topic_title_translator.rb +++ b/lib/translation/topic_title_translator.rb @@ -3,44 +3,10 @@ module DiscourseAi module Translation class TopicTitleTranslator < BaseTranslator - PROMPT_TEMPLATE = <<~TEXT.freeze - You are a translation service specializing in translating forum post titles from English to the asked target_locale. Your task is to provide accurate and contextually appropriate translations while adhering to the following guidelines: + private - 1. Translate the given title from English to target_locale asked. - 2. Keep proper nouns and technical terms in their original language. - 3. Attempt to keep the translated title length close to the original when possible. - 4. Ensure the translation maintains the original meaning and tone. - - To complete this task: - - 1. Read and understand the title carefully. - 2. Identify any proper nouns or technical terms that should remain untranslated. - 3. Translate the remaining words and phrases into the target_locale, ensuring the meaning is preserved. - 4. Adjust the translation if necessary to keep the length similar to the original title. - 5. Review your translation for accuracy and naturalness in the target_locale. - - Provide your translation in the following JSON format: - - - {"translation": "Your target_locale translation here"} - - - Here are three examples of correct translation - - Original: {"title":"New Update for Minecraft Adds Underwater Temples", "target_locale":"es"} - Correct translation: {"translation": "Nueva actualización para Minecraft añade templos submarinos"} - - Original: {"title":"Toyota announces revolutionary battery technology", "target_locale":"fr"} - Correct translation: {"translation": "Toyota annonce une technologie de batteries révolutionnaire"} - - Original: {"title": "Heathrow fechado: paralisação de voos deve continuar nos próximos dias, diz gestora do aeroporto de Londres", "target_locale": "en"} - Correct translation: {"translation": "Heathrow closed: flight disruption expected to continue in coming days, says London airport management"} - - Remember to keep proper nouns like "Minecraft" and "Toyota" in their original form. Translate the title now and provide your answer in the specified JSON format. - TEXT - - private def prompt_template - PROMPT_TEMPLATE + def persona_setting + SiteSetting.ai_translation_topic_title_translator_persona end end end diff --git a/spec/lib/translation/base_translator_spec.rb b/spec/lib/translation/base_translator_spec.rb index 6baae0d9..8c36175b 100644 --- a/spec/lib/translation/base_translator_spec.rb +++ b/spec/lib/translation/base_translator_spec.rb @@ -3,25 +3,34 @@ require "rails_helper" describe DiscourseAi::Translation::BaseTranslator do + let!(:persona) do + AiPersona.find( + DiscourseAi::Personas::Persona.system_personas[DiscourseAi::Personas::PostRawTranslator], + ) + end + before do Fabricate(:fake_model).tap do |fake_llm| SiteSetting.public_send("ai_translation_model=", "custom:#{fake_llm.id}") end + + SiteSetting.ai_translation_enabled = true end describe ".translate" do let(:text) { "cats are great" } let(:target_locale) { "de" } let(:llm_response) { "hur dur hur dur!" } + fab!(:post) it "creates the correct prompt" do post_translator = - DiscourseAi::Translation::PostRawTranslator.new(text:, target_locale:, topic_id: 1) + DiscourseAi::Translation::PostRawTranslator.new(text:, target_locale:, post:) allow(DiscourseAi::Completions::Prompt).to receive(:new).with( - DiscourseAi::Translation::PostRawTranslator::PROMPT_TEMPLATE, - messages: [{ type: :user, content: post_translator.formatted_content, id: "user" }], - topic_id: 1, - post_id: nil, + persona.system_prompt, + messages: [{ type: :user, content: post_translator.formatted_content }], + post_id: post.id, + topic_id: post.topic_id, ).and_call_original DiscourseAi::Completions::Llm.with_prepared_responses([llm_response]) do @@ -29,29 +38,6 @@ describe DiscourseAi::Translation::BaseTranslator do end end - it "sends the translation prompt to the selected ai helper model" do - mock_prompt = instance_double(DiscourseAi::Completions::Prompt) - mock_llm = instance_double(DiscourseAi::Completions::Llm) - post_translator = DiscourseAi::Translation::PostRawTranslator.new(text:, target_locale:) - - structured_output = - DiscourseAi::Completions::StructuredOutput.new({ translation: { type: "string" } }) - structured_output << { translation: llm_response }.to_json - - allow(DiscourseAi::Completions::Prompt).to receive(:new).and_return(mock_prompt) - allow(DiscourseAi::Completions::Llm).to receive(:proxy).with( - SiteSetting.ai_translation_model, - ).and_return(mock_llm) - allow(mock_llm).to receive(:generate).with( - mock_prompt, - user: Discourse.system_user, - feature_name: "translation", - response_format: post_translator.response_format, - ).and_return(structured_output) - - post_translator.translate - end - it "returns the translation from the llm's response" do DiscourseAi::Completions::Llm.with_prepared_responses([llm_response]) do expect( diff --git a/spec/lib/translation/language_detector_spec.rb b/spec/lib/translation/language_detector_spec.rb index 0de3235e..e6781f4b 100644 --- a/spec/lib/translation/language_detector_spec.rb +++ b/spec/lib/translation/language_detector_spec.rb @@ -19,7 +19,7 @@ describe DiscourseAi::Translation::LanguageDetector do it "creates the correct prompt" do allow(DiscourseAi::Completions::Prompt).to receive(:new).with( - AiPersona.find_by(id: SiteSetting.ai_translation_locale_detection_persona).system_prompt, + persona.system_prompt, messages: [{ type: :user, content: "meow", id: "user" }], ).and_call_original diff --git a/spec/lib/translation/post_localizer_spec.rb b/spec/lib/translation/post_localizer_spec.rb index 04dd8447..f498e62b 100644 --- a/spec/lib/translation/post_localizer_spec.rb +++ b/spec/lib/translation/post_localizer_spec.rb @@ -13,8 +13,7 @@ describe DiscourseAi::Translation::PostLocalizer do allow(DiscourseAi::Translation::PostRawTranslator).to receive(:new).with( text: opts[:text], target_locale: opts[:target_locale], - post_id: opts[:post_id] || post.id, - topic_id: opts[:topic_id] || post.topic_id, + post: opts[:post] || post, ).and_return(mock) allow(mock).to receive(:translate).and_return(opts[:translated]) end diff --git a/spec/lib/translation/topic_localizer_spec.rb b/spec/lib/translation/topic_localizer_spec.rb index 114e07e4..86543bde 100644 --- a/spec/lib/translation/topic_localizer_spec.rb +++ b/spec/lib/translation/topic_localizer_spec.rb @@ -20,17 +20,17 @@ describe DiscourseAi::Translation::TopicLocalizer do allow(DiscourseAi::Translation::TopicTitleTranslator).to receive(:new).with( text: opts[:text], target_locale: opts[:target_locale], - topic_id: opts[:topic_id] || topic.id, + topic: opts[:topic] || topic, ).and_return(mock) allow(mock).to receive(:translate).and_return(opts[:translated]) end - def short_text_translator_stub(opts) - mock = instance_double(DiscourseAi::Translation::ShortTextTranslator) - allow(DiscourseAi::Translation::ShortTextTranslator).to receive(:new).with( + def post_raw_translator_stub(opts) + mock = instance_double(DiscourseAi::Translation::PostRawTranslator) + allow(DiscourseAi::Translation::PostRawTranslator).to receive(:new).with( text: opts[:text], target_locale: opts[:target_locale], - topic_id: opts[:topic_id] || topic.id, + topic: opts[:topic] || topic, ).and_return(mock) allow(mock).to receive(:translate).and_return(opts[:translated]) end @@ -54,7 +54,7 @@ describe DiscourseAi::Translation::TopicLocalizer do topic_title_translator_stub( { text: topic.title, target_locale: "ja", translated: translated_title }, ) - short_text_translator_stub( + post_raw_translator_stub( { text: topic.excerpt, target_locale: "ja", translated: translated_excerpt }, ) @@ -65,7 +65,7 @@ describe DiscourseAi::Translation::TopicLocalizer do topic_title_translator_stub( { text: topic.title, target_locale: "zh_CN", translated: "这是一个猫主题 :)" }, ) - short_text_translator_stub( + post_raw_translator_stub( { text: topic.excerpt, target_locale: "zh_CN", translated: "这是一个猫主题 :)" }, ) @@ -76,7 +76,7 @@ describe DiscourseAi::Translation::TopicLocalizer do topic_title_translator_stub( { text: topic.title, target_locale: "ja", translated: translated_title }, ) - short_text_translator_stub( + post_raw_translator_stub( { text: topic.excerpt, target_locale: "ja", translated: translated_excerpt }, ) @@ -98,7 +98,7 @@ describe DiscourseAi::Translation::TopicLocalizer do topic_title_translator_stub( { text: topic.title, target_locale: "ja", translated: translated_title }, ) - short_text_translator_stub( + post_raw_translator_stub( { text: topic.excerpt, target_locale: "ja", translated: translated_excerpt }, )