QUESTIONS

Predictive Analytics: What sentiment and emotions behind humor of sitcoms correlate with higher audience ratings?

Descriptive Analytics: How have the themes and humor styles in comedy shows evolved overtime?

LOAD IN LIBRARIES

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(stringr)
# install.packages("writexl")
library(writexl)

1) MODERN FAMILY VISUALIZATIONS

fam_sent <- read.csv("Cleaned Modern Family Sentiment Dataset.csv")
# names(fam_sent)
# fam_sent
MODERN FAMILY SENTIMENT ANALYSIS METHOD VADER AND TEXTBLOB: LINE GRAPH
# tells the difference between sentiment trends of vader and textblob
library(ggplot2)

ggplot(fam_sent, aes(x = `Season.Episode`)) +
  geom_line(aes(y = `Average.TextBlob.Sentiment`, color = "TextBlob"), size = 1) +
  geom_line(aes(y = `Average.VADER.Sentiment`, color = "VADER"), size = 1) +
  labs(title = "Sentiment Trends in Modern Family",
       x = "Season's with Episodes",
       y = "Average Sentiment Score",
       color = "Sentiment Analysis Method") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Removed 40 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Removed 40 rows containing missing values or values outside the scale range
## (`geom_line()`).

MODERN FAMILY SENTIMENT ANALYSIS METHOD VADER: LINE GRAPH

ggplot(fam_sent, aes(x = `Season.Episode`, y = `Average.VADER.Sentiment`, group = 1)) +
  geom_line(color = "steelblue", size = 1) +
  labs(title = "VADER Sentiment Trends Over Episodes",
       x = "Season.Episode",
       y = "Average VADER Sentiment Score") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))
## Warning: Removed 40 rows containing missing values or values outside the scale range
## (`geom_line()`).

MODERN FAMILY SENTIMENT ANALYSIS METHOD TEXTBLOB: LINE GRAPH

ggplot(fam_sent, aes(x = `Season.Episode`, y = `Average.TextBlob.Sentiment`, group = 1)) +
  geom_line(color = "darkred", size = 1) +
  labs(title = "TextBlob Sentiment Trends Over Episodes",
       x = "Season.Episode",
       y = "Average TextBlob Sentiment Score") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))
## Warning: Removed 40 rows containing missing values or values outside the scale range
## (`geom_line()`).

# MODERN FAMILY DATASET
# DISPLAYS BOXPLOTS FOR THE VADER AND SENTIMENT ANALYSIS
fam_sent <- fam_sent %>% mutate(Season = substr(`Season.Episode`, 1, 2)) # Extract season

ggplot(fam_sent, aes(x = Season, y = `Average.VADER.Sentiment`, fill = Season)) +
  geom_boxplot() +
  theme_minimal() +
  labs(title = "Distribution of Vader Sentiment by Season",
       x = "Season",
       y = "VADER Sentiment Score")

names(fam_sent)
## [1] "Season.Episode"             "Average.TextBlob.Sentiment"
## [3] "Average.VADER.Sentiment"    "VADER.Positive.Lines"      
## [5] "VADER.Negative.Lines"       "VADER.Neutral.Lines"       
## [7] "Season"
ggplot(fam_sent, aes(x = Season, y = `Average.TextBlob.Sentiment`, fill = Season)) +
  geom_boxplot() +
  theme_minimal() +
  labs(title = "Distribution of Textblob Sentiment by Season",
       x = "Season",
       y = "VADER Sentiment Score")

2) THE OFFICE VISUALIZATIONS

office_emo <- read.csv("office_emotion_data.csv")
head(office_emo)
##        Emotion Score Episode Season
## 1 anticipation    49       1      1
## 2          joy    49       1      1
## 3     positive   109       1      1
## 4     surprise    22       1      1
## 5        trust    70       1      1
## 6     negative    76       1      1
names(office_emo)
## [1] "Emotion" "Score"   "Episode" "Season"
library(ggplot2)

ggplot(office_emo, aes(x = factor(Season), y = Score, fill = Emotion)) +  
  geom_bar(stat = "identity", position = "stack") +  
  labs(title = "Emotion Distribution Per Season",
       x = "Season",
       y = "Total Emotion Score",
       fill = "Emotion") +
  theme_minimal()

### EMOTIONAL TRENDS BY EPISODE IN A PARTICULAR SEASON

library(ggplot2)
library(dplyr)

# Select a specific season (e.g., Season 3)
season_number <- 1

office_emo_filtered <- office_emo %>%
  filter(Season == season_number)  # Filter data for the selected season

ggplot(office_emo_filtered, aes(x = factor(Episode), y = Score, fill = Emotion)) +  
  geom_bar(stat = "identity", position = "stack") +  
  labs(title = paste("Emotion Distribution in Season", season_number),
       x = "Episode",
       y = "Total Emotion Score",
       fill = "Emotion") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))  # Rotate x-axis labels for readability

2) SEINFELD VISUALIZATIONS

seinfeld_emo <- read.csv("seinfeld_emotion_data.csv")
head(seinfeld_emo)
##        Emotion Score EpisodeNo Season season
## 1         fear    61         1      1     NA
## 2     negative   119         1      1     NA
## 3      sadness    62         1      1     NA
## 4 anticipation   120         1      1     NA
## 5     positive   140         1      1     NA
## 6        anger    43         1      1     NA
names(seinfeld_emo)
## [1] "Emotion"   "Score"     "EpisodeNo" "Season"    "season"
library(ggplot2)
library(dplyr)

season_numbers <- 1 # number can be changed to other season #'s

seinfeld_emo_filtered <- seinfeld_emo %>%
  filter(Season == season_numbers)

ggplot(seinfeld_emo_filtered, aes(x = EpisodeNo, y = Score, color = Emotion, group = Emotion)) +
  geom_line(size = 1) +
  labs(title = paste("Emotion Trends in Season 1 of Seinfeld", season_numbers),
       x = "Episode",
       y = "Emotion Score",
       color = "Emotion") +
  theme_minimal()

library(ggplot2)
library(dplyr)

season_numbers <- 1 # number can be changed to other season #'s

seinfeld_emo_filtered <- seinfeld_emo %>%
  filter(Season == season_numbers, Emotion %in% c("negative", "positive"))

ggplot(seinfeld_emo_filtered, aes(x = EpisodeNo, y = Score, color = Emotion, group = Emotion)) +
  geom_line(size = 1) +
  labs(title = paste("Postive and Negative Emotion Trends in Season", season_numbers, "of Seinfeld"),
       x = "Episode",
       y = "Emotion Score",
       color = "Emotion") +
  theme_minimal()

library(ggplot2)
library(dplyr)

seinfeld_emo_filtered <- seinfeld_emo %>%
  filter(Emotion %in% c("negative", "positive"))

ggplot(seinfeld_emo_filtered, aes(x = EpisodeNo, y = Score, color = Emotion, group = Emotion)) +
  geom_line(size = 1) +
  facet_wrap(~ Season, scales = "free_x") +  # Creates separate plots for each season
  labs(title = "Emotion Trends in Seinfeld Across All Seasons",
       x = "Episode",
       y = "Emotion Score",
       color = "Emotion") +
  theme_minimal()