From d0198c5c5bedd16a90f3e0fab00086965069e663 Mon Sep 17 00:00:00 2001 From: Roman Rizzi Date: Thu, 9 Nov 2023 17:23:25 -0300 Subject: [PATCH] FIX: Changes to the sentiment reports. (#289) This PR aims to clarify sentiment reports by replacing averages with a count of posts that have one of their values above a threshold (60), meaning we have some level of confidence they are, in fact, positive or negative. Same thing happen with post emotions, with the difference that a post can have multiple values above it (30). Additionally, we dropped the "Neutral" axis. We also reworded the tooltip next to each report title, and added an early return to signal we have no data available instead of displaying an empty chart. --- config/locales/server.en.yml | 4 +- lib/modules/sentiment/entry_point.rb | 57 ++++++++++++++----- .../lib/modules/sentiment/entry_point_spec.rb | 27 +++++---- 3 files changed, 59 insertions(+), 29 deletions(-) diff --git a/config/locales/server.en.yml b/config/locales/server.en.yml index a039e468..61caf621 100644 --- a/config/locales/server.en.yml +++ b/config/locales/server.en.yml @@ -102,12 +102,12 @@ en: reports: overall_sentiment: title: "Overall sentiment" - description: "The average percentage of positive and negative sentiments in public posts." + description: "This chart compares the number of posts classified either positive or negative." xaxis: "Positive(%)" yaxis: "Date" post_emotion: title: "Post emotion" - description: "The average percentage of emotions present in public posts grouped by the poster's trust level." + description: "Number of posts classified with one of the following emotions, grouped by poster's trust level." xaxis: yaxis: diff --git a/lib/modules/sentiment/entry_point.rb b/lib/modules/sentiment/entry_point.rb index 37748735..9ae437b6 100644 --- a/lib/modules/sentiment/entry_point.rb +++ b/lib/modules/sentiment/entry_point.rb @@ -21,13 +21,21 @@ module DiscourseAi plugin.add_report("overall_sentiment") do |report| report.modes = [:stacked_chart] + threshold = 60 + + sentiment_count_sql = Proc.new { |sentiment| <<~SQL } + COUNT( + CASE WHEN (cr.classification::jsonb->'#{sentiment}')::integer > :threshold THEN 1 ELSE NULL END + ) AS #{sentiment}_count + SQL grouped_sentiments = - DB.query(<<~SQL, report_start: report.start_date, report_end: report.end_date) + DB.query( + <<~SQL, SELECT DATE_TRUNC('day', p.created_at)::DATE AS posted_at, - AVG((cr.classification::jsonb->'positive')::integer) AS avg_positive, - -AVG((cr.classification::jsonb->'negative')::integer) AS avg_negative + #{sentiment_count_sql.call("positive")}, + -#{sentiment_count_sql.call("negative")} FROM classification_results AS cr INNER JOIN posts p ON p.id = cr.target_id AND cr.target_type = 'Post' @@ -40,9 +48,15 @@ module DiscourseAi (p.created_at > :report_start AND p.created_at < :report_end) GROUP BY DATE_TRUNC('day', p.created_at) SQL + report_start: report.start_date, + report_end: report.end_date, + threshold: threshold, + ) data_points = %w[positive negative] + return report if grouped_sentiments.empty? + report.data = data_points.map do |point| { @@ -51,7 +65,7 @@ module DiscourseAi label: I18n.t("discourse_ai.sentiment.reports.overall_sentiment.#{point}"), data: grouped_sentiments.map do |gs| - { x: gs.posted_at, y: gs.public_send("avg_#{point}") } + { x: gs.posted_at, y: gs.public_send("#{point}_count") } end, } end @@ -59,18 +73,25 @@ module DiscourseAi plugin.add_report("post_emotion") do |report| report.modes = [:radar] + threshold = 30 + + emotion_count_clause = Proc.new { |emotion| <<~SQL } + COUNT( + CASE WHEN (cr.classification::jsonb->'#{emotion}')::integer > :threshold THEN 1 ELSE NULL END + ) AS #{emotion}_count + SQL grouped_emotions = - DB.query(<<~SQL, report_start: report.start_date, report_end: report.end_date) + DB.query( + <<~SQL, SELECT u.trust_level AS trust_level, - AVG((cr.classification::jsonb->'sadness')::integer) AS avg_sadness, - AVG((cr.classification::jsonb->'surprise')::integer) AS avg_surprise, - AVG((cr.classification::jsonb->'neutral')::integer) AS avg_neutral, - AVG((cr.classification::jsonb->'fear')::integer) AS avg_fear, - AVG((cr.classification::jsonb->'anger')::integer) AS avg_anger, - AVG((cr.classification::jsonb->'joy')::integer) AS avg_joy, - AVG((cr.classification::jsonb->'disgust')::integer) AS avg_disgust + #{emotion_count_clause.call("sadness")}, + #{emotion_count_clause.call("surprise")}, + #{emotion_count_clause.call("fear")}, + #{emotion_count_clause.call("anger")}, + #{emotion_count_clause.call("joy")}, + #{emotion_count_clause.call("disgust")} FROM classification_results AS cr INNER JOIN posts p ON p.id = cr.target_id AND cr.target_type = 'Post' @@ -84,10 +105,16 @@ module DiscourseAi (p.created_at > :report_start AND p.created_at < :report_end) GROUP BY u.trust_level SQL + report_start: report.start_date, + report_end: report.end_date, + threshold: threshold, + ) - emotions = %w[sadness surprise neutral fear anger joy disgust] + emotions = %w[sadness surprise fear anger joy disgust] level_groups = [[0, 1], [2, 3, 4]] + return report if grouped_emotions.empty? + report.data = level_groups.each_with_index.map do |lg, idx| tl_emotion_avgs = grouped_emotions.select { |ge| lg.include?(ge.trust_level) } @@ -102,8 +129,8 @@ module DiscourseAi x: I18n.t("discourse_ai.sentiment.reports.post_emotion.#{e}"), y: tl_emotion_avgs.sum do |tl_emotion_avg| - tl_emotion_avg.public_send("avg_#{e}").to_i - end / [tl_emotion_avgs.size, 1].max, + tl_emotion_avg.public_send("#{e}_count").to_i + end, } end, } diff --git a/spec/lib/modules/sentiment/entry_point_spec.rb b/spec/lib/modules/sentiment/entry_point_spec.rb index b9e81a43..853c1b95 100644 --- a/spec/lib/modules/sentiment/entry_point_spec.rb +++ b/spec/lib/modules/sentiment/entry_point_spec.rb @@ -62,7 +62,7 @@ RSpec.describe DiscourseAi::Sentiment::EntryPoint do describe "overall_sentiment report" do let(:positive_classification) { { negative: 2, neutral: 30, positive: 70 } } - let(:negative_classification) { { negative: 60, neutral: 2, positive: 10 } } + let(:negative_classification) { { negative: 65, neutral: 2, positive: 10 } } def sentiment_classification(post, classification) Fabricate(:sentiment_classification, target: post, classification: classification) @@ -73,17 +73,12 @@ RSpec.describe DiscourseAi::Sentiment::EntryPoint do sentiment_classification(post_2, negative_classification) sentiment_classification(pm, positive_classification) - expected_positive = - (positive_classification[:positive] + negative_classification[:positive]) / 2 - expected_negative = - -(positive_classification[:negative] + negative_classification[:negative]) / 2 - report = Report.find("overall_sentiment") positive_data_point = report.data[0][:data].first[:y].to_i negative_data_point = report.data[1][:data].first[:y].to_i - expect(positive_data_point).to eq(expected_positive) - expect(negative_data_point).to eq(expected_negative) + expect(positive_data_point).to eq(1) + expect(negative_data_point).to eq(-1) end end @@ -109,17 +104,25 @@ RSpec.describe DiscourseAi::Sentiment::EntryPoint do post_1.user.update!(trust_level: TrustLevel[0]) post_2.user.update!(trust_level: TrustLevel[3]) pm.user.update!(trust_level: TrustLevel[0]) + threshold = 30 emotion_classification(post_1, emotion_1) emotion_classification(post_2, emotion_2) emotion_classification(pm, emotion_2) report = Report.find("post_emotion") - tl_01_point = report.data[0][:data].first - tl_234_point = report.data[1][:data].first + tl_01_point = report.data[0][:data] + tl_234_point = report.data[1][:data] - expect(tl_01_point[:y]).to eq(emotion_1[tl_01_point[:x].downcase.to_sym]) - expect(tl_234_point[:y]).to eq(emotion_2[tl_234_point[:x].downcase.to_sym]) + tl_01_point.each do |point| + expected = emotion_1[point[:x].downcase.to_sym] > threshold ? 1 : 0 + expect(point[:y]).to eq(expected) + end + + tl_234_point.each do |point| + expected = emotion_2[point[:x].downcase.to_sym] > threshold ? 1 : 0 + expect(point[:y]).to eq(expected) + end end it "doesn't try to divide by zero if there are no data in a TL group" do