From d0198c5c5bedd16a90f3e0fab00086965069e663 Mon Sep 17 00:00:00 2001
From: Roman Rizzi <roman@discourse.org>
Date: Thu, 9 Nov 2023 17:23:25 -0300
Subject: [PATCH] FIX: Changes to the sentiment reports. (#289)

This PR aims to clarify sentiment reports by replacing averages with a count of posts that have one of their values above a threshold (60), meaning we have some level of confidence they are, in fact, positive or negative.

Same thing happen with post emotions, with the difference that a post can have multiple values above it (30). Additionally, we dropped the "Neutral" axis.

We also reworded the tooltip next to each report title, and added an early return to signal we have no data available instead of displaying an empty chart.
---
 config/locales/server.en.yml                  |  4 +-
 lib/modules/sentiment/entry_point.rb          | 57 ++++++++++++++-----
 .../lib/modules/sentiment/entry_point_spec.rb | 27 +++++----
 3 files changed, 59 insertions(+), 29 deletions(-)

diff --git a/config/locales/server.en.yml b/config/locales/server.en.yml
index a039e468..61caf621 100644
--- a/config/locales/server.en.yml
+++ b/config/locales/server.en.yml
@@ -102,12 +102,12 @@ en:
   reports:
     overall_sentiment:
       title: "Overall sentiment"
-      description: "The average percentage of positive and negative sentiments in public posts."
+      description: "This chart compares the number of posts classified either positive or negative."
       xaxis: "Positive(%)"
       yaxis: "Date"
     post_emotion:
       title: "Post emotion"
-      description: "The average percentage of emotions present in public posts grouped by the poster's trust level."
+      description: "Number of posts classified with one of the following emotions, grouped by poster's trust level."
       xaxis:
       yaxis:
 
diff --git a/lib/modules/sentiment/entry_point.rb b/lib/modules/sentiment/entry_point.rb
index 37748735..9ae437b6 100644
--- a/lib/modules/sentiment/entry_point.rb
+++ b/lib/modules/sentiment/entry_point.rb
@@ -21,13 +21,21 @@ module DiscourseAi
 
         plugin.add_report("overall_sentiment") do |report|
           report.modes = [:stacked_chart]
+          threshold = 60
+
+          sentiment_count_sql = Proc.new { |sentiment| <<~SQL }
+            COUNT(
+              CASE WHEN (cr.classification::jsonb->'#{sentiment}')::integer > :threshold THEN 1 ELSE NULL END
+            ) AS #{sentiment}_count
+          SQL
 
           grouped_sentiments =
-            DB.query(<<~SQL, report_start: report.start_date, report_end: report.end_date)
+            DB.query(
+              <<~SQL,
             SELECT 
               DATE_TRUNC('day', p.created_at)::DATE AS posted_at,
-              AVG((cr.classification::jsonb->'positive')::integer) AS avg_positive,
-              -AVG((cr.classification::jsonb->'negative')::integer) AS avg_negative
+              #{sentiment_count_sql.call("positive")},
+              -#{sentiment_count_sql.call("negative")}
             FROM 
               classification_results AS cr
             INNER JOIN posts p ON p.id = cr.target_id AND cr.target_type = 'Post'
@@ -40,9 +48,15 @@ module DiscourseAi
               (p.created_at > :report_start AND p.created_at < :report_end)
             GROUP BY DATE_TRUNC('day', p.created_at)
           SQL
+              report_start: report.start_date,
+              report_end: report.end_date,
+              threshold: threshold,
+            )
 
           data_points = %w[positive negative]
 
+          return report if grouped_sentiments.empty?
+
           report.data =
             data_points.map do |point|
               {
@@ -51,7 +65,7 @@ module DiscourseAi
                 label: I18n.t("discourse_ai.sentiment.reports.overall_sentiment.#{point}"),
                 data:
                   grouped_sentiments.map do |gs|
-                    { x: gs.posted_at, y: gs.public_send("avg_#{point}") }
+                    { x: gs.posted_at, y: gs.public_send("#{point}_count") }
                   end,
               }
             end
@@ -59,18 +73,25 @@ module DiscourseAi
 
         plugin.add_report("post_emotion") do |report|
           report.modes = [:radar]
+          threshold = 30
+
+          emotion_count_clause = Proc.new { |emotion| <<~SQL }
+            COUNT(
+              CASE WHEN (cr.classification::jsonb->'#{emotion}')::integer > :threshold THEN 1 ELSE NULL END
+            ) AS #{emotion}_count
+          SQL
 
           grouped_emotions =
-            DB.query(<<~SQL, report_start: report.start_date, report_end: report.end_date)
+            DB.query(
+              <<~SQL,
             SELECT 
               u.trust_level AS trust_level,
-              AVG((cr.classification::jsonb->'sadness')::integer) AS avg_sadness,
-              AVG((cr.classification::jsonb->'surprise')::integer) AS avg_surprise,
-              AVG((cr.classification::jsonb->'neutral')::integer) AS avg_neutral,
-              AVG((cr.classification::jsonb->'fear')::integer) AS avg_fear,
-              AVG((cr.classification::jsonb->'anger')::integer) AS avg_anger,
-              AVG((cr.classification::jsonb->'joy')::integer) AS avg_joy,
-              AVG((cr.classification::jsonb->'disgust')::integer) AS avg_disgust
+              #{emotion_count_clause.call("sadness")},
+              #{emotion_count_clause.call("surprise")},
+              #{emotion_count_clause.call("fear")},
+              #{emotion_count_clause.call("anger")},
+              #{emotion_count_clause.call("joy")},
+              #{emotion_count_clause.call("disgust")}
             FROM
               classification_results AS cr
             INNER JOIN posts p ON p.id = cr.target_id AND cr.target_type = 'Post'
@@ -84,10 +105,16 @@ module DiscourseAi
               (p.created_at > :report_start AND p.created_at < :report_end)
             GROUP BY u.trust_level
           SQL
+              report_start: report.start_date,
+              report_end: report.end_date,
+              threshold: threshold,
+            )
 
-          emotions = %w[sadness surprise neutral fear anger joy disgust]
+          emotions = %w[sadness surprise fear anger joy disgust]
           level_groups = [[0, 1], [2, 3, 4]]
 
+          return report if grouped_emotions.empty?
+
           report.data =
             level_groups.each_with_index.map do |lg, idx|
               tl_emotion_avgs = grouped_emotions.select { |ge| lg.include?(ge.trust_level) }
@@ -102,8 +129,8 @@ module DiscourseAi
                       x: I18n.t("discourse_ai.sentiment.reports.post_emotion.#{e}"),
                       y:
                         tl_emotion_avgs.sum do |tl_emotion_avg|
-                          tl_emotion_avg.public_send("avg_#{e}").to_i
-                        end / [tl_emotion_avgs.size, 1].max,
+                          tl_emotion_avg.public_send("#{e}_count").to_i
+                        end,
                     }
                   end,
               }
diff --git a/spec/lib/modules/sentiment/entry_point_spec.rb b/spec/lib/modules/sentiment/entry_point_spec.rb
index b9e81a43..853c1b95 100644
--- a/spec/lib/modules/sentiment/entry_point_spec.rb
+++ b/spec/lib/modules/sentiment/entry_point_spec.rb
@@ -62,7 +62,7 @@ RSpec.describe DiscourseAi::Sentiment::EntryPoint do
 
     describe "overall_sentiment report" do
       let(:positive_classification) { { negative: 2, neutral: 30, positive: 70 } }
-      let(:negative_classification) { { negative: 60, neutral: 2, positive: 10 } }
+      let(:negative_classification) { { negative: 65, neutral: 2, positive: 10 } }
 
       def sentiment_classification(post, classification)
         Fabricate(:sentiment_classification, target: post, classification: classification)
@@ -73,17 +73,12 @@ RSpec.describe DiscourseAi::Sentiment::EntryPoint do
         sentiment_classification(post_2, negative_classification)
         sentiment_classification(pm, positive_classification)
 
-        expected_positive =
-          (positive_classification[:positive] + negative_classification[:positive]) / 2
-        expected_negative =
-          -(positive_classification[:negative] + negative_classification[:negative]) / 2
-
         report = Report.find("overall_sentiment")
         positive_data_point = report.data[0][:data].first[:y].to_i
         negative_data_point = report.data[1][:data].first[:y].to_i
 
-        expect(positive_data_point).to eq(expected_positive)
-        expect(negative_data_point).to eq(expected_negative)
+        expect(positive_data_point).to eq(1)
+        expect(negative_data_point).to eq(-1)
       end
     end
 
@@ -109,17 +104,25 @@ RSpec.describe DiscourseAi::Sentiment::EntryPoint do
         post_1.user.update!(trust_level: TrustLevel[0])
         post_2.user.update!(trust_level: TrustLevel[3])
         pm.user.update!(trust_level: TrustLevel[0])
+        threshold = 30
 
         emotion_classification(post_1, emotion_1)
         emotion_classification(post_2, emotion_2)
         emotion_classification(pm, emotion_2)
 
         report = Report.find("post_emotion")
-        tl_01_point = report.data[0][:data].first
-        tl_234_point = report.data[1][:data].first
+        tl_01_point = report.data[0][:data]
+        tl_234_point = report.data[1][:data]
 
-        expect(tl_01_point[:y]).to eq(emotion_1[tl_01_point[:x].downcase.to_sym])
-        expect(tl_234_point[:y]).to eq(emotion_2[tl_234_point[:x].downcase.to_sym])
+        tl_01_point.each do |point|
+          expected = emotion_1[point[:x].downcase.to_sym] > threshold ? 1 : 0
+          expect(point[:y]).to eq(expected)
+        end
+
+        tl_234_point.each do |point|
+          expected = emotion_2[point[:x].downcase.to_sym] > threshold ? 1 : 0
+          expect(point[:y]).to eq(expected)
+        end
       end
 
       it "doesn't try to divide by zero if there are no data in a TL group" do