QUESTIONS
Predictive Analytics: What sentiment and emotions behind humor of sitcoms correlate with higher audience ratings?
Descriptive Analytics: How have the themes and humor styles in comedy shows evolved overtime?
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(stringr)
# install.packages("writexl")
library(writexl)
fam_sent <- read.csv("Cleaned Modern Family Sentiment Dataset.csv")
# names(fam_sent)
# fam_sent
# tells the difference between sentiment trends of vader and textblob
library(ggplot2)
ggplot(fam_sent, aes(x = `Season.Episode`)) +
geom_line(aes(y = `Average.TextBlob.Sentiment`, color = "TextBlob"), size = 1) +
geom_line(aes(y = `Average.VADER.Sentiment`, color = "VADER"), size = 1) +
labs(title = "Sentiment Trends in Modern Family",
x = "Season's with Episodes",
y = "Average Sentiment Score",
color = "Sentiment Analysis Method") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Removed 40 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Removed 40 rows containing missing values or values outside the scale range
## (`geom_line()`).
ggplot(fam_sent, aes(x = `Season.Episode`, y = `Average.VADER.Sentiment`, group = 1)) +
geom_line(color = "steelblue", size = 1) +
labs(title = "VADER Sentiment Trends Over Episodes",
x = "Season.Episode",
y = "Average VADER Sentiment Score") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
## Warning: Removed 40 rows containing missing values or values outside the scale range
## (`geom_line()`).
ggplot(fam_sent, aes(x = `Season.Episode`, y = `Average.TextBlob.Sentiment`, group = 1)) +
geom_line(color = "darkred", size = 1) +
labs(title = "TextBlob Sentiment Trends Over Episodes",
x = "Season.Episode",
y = "Average TextBlob Sentiment Score") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
## Warning: Removed 40 rows containing missing values or values outside the scale range
## (`geom_line()`).
# MODERN FAMILY DATASET
# DISPLAYS BOXPLOTS FOR THE VADER AND SENTIMENT ANALYSIS
fam_sent <- fam_sent %>% mutate(Season = substr(`Season.Episode`, 1, 2)) # Extract season
ggplot(fam_sent, aes(x = Season, y = `Average.VADER.Sentiment`, fill = Season)) +
geom_boxplot() +
theme_minimal() +
labs(title = "Distribution of Vader Sentiment by Season",
x = "Season",
y = "VADER Sentiment Score")
names(fam_sent)
## [1] "Season.Episode" "Average.TextBlob.Sentiment"
## [3] "Average.VADER.Sentiment" "VADER.Positive.Lines"
## [5] "VADER.Negative.Lines" "VADER.Neutral.Lines"
## [7] "Season"
ggplot(fam_sent, aes(x = Season, y = `Average.TextBlob.Sentiment`, fill = Season)) +
geom_boxplot() +
theme_minimal() +
labs(title = "Distribution of Textblob Sentiment by Season",
x = "Season",
y = "VADER Sentiment Score")
office_emo <- read.csv("office_emotion_data.csv")
head(office_emo)
## Emotion Score Episode Season
## 1 anticipation 49 1 1
## 2 joy 49 1 1
## 3 positive 109 1 1
## 4 surprise 22 1 1
## 5 trust 70 1 1
## 6 negative 76 1 1
names(office_emo)
## [1] "Emotion" "Score" "Episode" "Season"
library(ggplot2)
ggplot(office_emo, aes(x = factor(Season), y = Score, fill = Emotion)) +
geom_bar(stat = "identity", position = "stack") +
labs(title = "Emotion Distribution Per Season",
x = "Season",
y = "Total Emotion Score",
fill = "Emotion") +
theme_minimal()
### EMOTIONAL TRENDS BY EPISODE IN A PARTICULAR SEASON
library(ggplot2)
library(dplyr)
# Select a specific season (e.g., Season 3)
season_number <- 1
office_emo_filtered <- office_emo %>%
filter(Season == season_number) # Filter data for the selected season
ggplot(office_emo_filtered, aes(x = factor(Episode), y = Score, fill = Emotion)) +
geom_bar(stat = "identity", position = "stack") +
labs(title = paste("Emotion Distribution in Season", season_number),
x = "Episode",
y = "Total Emotion Score",
fill = "Emotion") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) # Rotate x-axis labels for readability
library(ggplot2)
library(dplyr)
season_number <- 1 # number can be changed to other season #'s
office_emo_filtered <- office_emo %>%
filter(Season == season_number)
ggplot(office_emo_filtered, aes(x = Episode, y = Score, color = Emotion, group = Emotion)) +
geom_line(size = 1) +
labs(title = paste("Emotion Trends in Season 1 of The Office", season_number),
x = "Episode",
y = "Emotion Score",
color = "Emotion") +
theme_minimal()
seinfeld_emo <- read.csv("seinfeld_emotion_data.csv")
head(seinfeld_emo)
## Emotion Score EpisodeNo Season season
## 1 fear 61 1 1 NA
## 2 negative 119 1 1 NA
## 3 sadness 62 1 1 NA
## 4 anticipation 120 1 1 NA
## 5 positive 140 1 1 NA
## 6 anger 43 1 1 NA
names(seinfeld_emo)
## [1] "Emotion" "Score" "EpisodeNo" "Season" "season"
library(ggplot2)
library(dplyr)
season_numbers <- 1 # number can be changed to other season #'s
seinfeld_emo_filtered <- seinfeld_emo %>%
filter(Season == season_numbers)
ggplot(seinfeld_emo_filtered, aes(x = EpisodeNo, y = Score, color = Emotion, group = Emotion)) +
geom_line(size = 1) +
labs(title = paste("Emotion Trends in Season 1 of Seinfeld", season_numbers),
x = "Episode",
y = "Emotion Score",
color = "Emotion") +
theme_minimal()
library(ggplot2)
library(dplyr)
season_numbers <- 1 # number can be changed to other season #'s
seinfeld_emo_filtered <- seinfeld_emo %>%
filter(Season == season_numbers, Emotion %in% c("negative", "positive"))
ggplot(seinfeld_emo_filtered, aes(x = EpisodeNo, y = Score, color = Emotion, group = Emotion)) +
geom_line(size = 1) +
labs(title = paste("Postive and Negative Emotion Trends in Season", season_numbers, "of Seinfeld"),
x = "Episode",
y = "Emotion Score",
color = "Emotion") +
theme_minimal()
library(ggplot2)
library(dplyr)
seinfeld_emo_filtered <- seinfeld_emo %>%
filter(Emotion %in% c("negative", "positive"))
ggplot(seinfeld_emo_filtered, aes(x = EpisodeNo, y = Score, color = Emotion, group = Emotion)) +
geom_line(size = 1) +
facet_wrap(~ Season, scales = "free_x") + # Creates separate plots for each season
labs(title = "Emotion Trends in Seinfeld Across All Seasons",
x = "Episode",
y = "Emotion Score",
color = "Emotion") +
theme_minimal()