Task 1.1
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(psych)
library(readxl)
library(tinytex)
library(ggplot2)
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
library(sentimentr)
Data_Fall_2023 <- read_excel("C:/Data - Fall 2023.xlsx", sheet = "Data")
filtered_data <- Data_Fall_2023 %>%
filter(Finished == "TRUE" & passedattn == "yes" & age != "149")
old_data <- filtered_data[, c("initials_box", "target_sex", "sex", "age","initiator_type","feelings_youalone", "feelings_bothyoufirst", "feelings_themalone", "feelings_boththemfirst",
"feelings_neither", "feelings_youaloneforgiven")]
summary_cols <- print(summary(old_data[c("age")]))
## age
## Min. :18.00
## 1st Qu.:26.00
## Median :29.00
## Mean :31.07
## 3rd Qu.:32.00
## Max. :73.00
sex_summary <- print((table(old_data$sex)))
##
## Female Male
## 26 19
sex_summaryt <- print((table(old_data$target_sex)))
##
## Female Male
## 25 20
feelingsyoualone <- print(describe(old_data$feelings_youalone))
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 45 -18.36 12.03 -20 -20.08 14.83 -30 10 40 0.89 -0.09 1.79
feelingsyoubothyou <- print(describe(old_data$feelings_bothyoufirst))
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 45 7.84 14.45 10 9.19 14.83 -30 30 60 -0.84 -0.09 2.15
t_test_result <- print(t.test(old_data$feelings_youalone, old_data$feelings_bothyoufirst, paired = TRUE))
##
## Paired t-test
##
## data: old_data$feelings_youalone and old_data$feelings_bothyoufirst
## t = -12.625, df = 44, p-value = 3.168e-16
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## -30.38239 -22.01761
## sample estimates:
## mean difference
## -26.2
Task 1.2
always_data <- old_data %>%
filter(initiator_type == "always")
conditional_data <- old_data %>%
filter(initiator_type == "conditional")
never_data <- old_data %>%
filter(initiator_type == "never")
# always
t_test_feelings_a <- print(t.test(always_data$feelings_youalone, always_data$feelings_bothyoufirst, paired = TRUE))
##
## Paired t-test
##
## data: always_data$feelings_youalone and always_data$feelings_bothyoufirst
## t = -10.466, df = 20, p-value = 1.464e-09
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## -31.46769 -21.00850
## sample estimates:
## mean difference
## -26.2381
print(describe(always_data$feelings_youalone))
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 21 -13.48 11.92 -14 -14.24 10.38 -30 10 40 0.56 -0.63 2.6
print(describe(always_data$feelings_bothyoufirst))
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 21 12.76 11.05 13 13.53 10.38 -10 30 40 -0.57 -0.52 2.41
# conditional
t_test_feelings_c <- print(t.test(conditional_data$feelings_youalone, conditional_data$feelings_bothyoufirst, paired = TRUE))
##
## Paired t-test
##
## data: conditional_data$feelings_youalone and conditional_data$feelings_bothyoufirst
## t = -5.9957, df = 13, p-value = 4.478e-05
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## -39.35217 -18.50497
## sample estimates:
## mean difference
## -28.92857
print(describe(conditional_data$feelings_youalone))
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 14 -24.07 11.61 -30 -26.42 0 -30 10 40 1.91 2.6 3.1
print(describe(conditional_data$feelings_bothyoufirst))
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 14 4.86 16.97 9.5 6.08 15.57 -30 25 55 -0.65 -0.94 4.53
# never
t_test_feelings_n <- print(t.test(never_data$feelings_youalone, never_data$feelings_bothyoufirst, paired = TRUE))
##
## Paired t-test
##
## data: never_data$feelings_youalone and never_data$feelings_bothyoufirst
## t = -5.666, df = 9, p-value = 0.0003073
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## -31.20325 -13.39675
## sample estimates:
## mean difference
## -22.3
print(describe(never_data$feelings_youalone))
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 10 -20.6 9.2 -20 -21 14.83 -30 -8 22 0.18 -1.79 2.91
print(describe(never_data$feelings_bothyoufirst))
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 10 1.7 14.89 9 2.5 8.15 -24 21 45 -0.56 -1.29 4.71
Task 1.3
forgiven_data <- old_data
forgiven_feelings <- forgiven_data$feelings_youaloneforgiven
print(describe(forgiven_feelings))
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 45 -13.91 15.62 -19 -15.38 16.31 -30 22 52 0.52 -0.93 2.33
t_test_youaloneforgiven <- print(t.test(forgiven_feelings, forgiven_data$feelings_youalone, paired = TRUE))
##
## Paired t-test
##
## data: forgiven_feelings and forgiven_data$feelings_youalone
## t = 2.1424, df = 44, p-value = 0.03773
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## 0.2635444 8.6253445
## sample estimates:
## mean difference
## 4.444444
t_test_bothyoufirstforgiven <-print(t.test(forgiven_feelings, forgiven_data$feelings_bothyoufirst, paired = TRUE))
##
## Paired t-test
##
## data: forgiven_feelings and forgiven_data$feelings_bothyoufirst
## t = -8.4042, df = 44, p-value = 1.068e-10
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## -26.97266 -16.53845
## sample estimates:
## mean difference
## -21.75556
Additional Figures and Anaylsis
avg_feelings <- old_data %>%
group_by(initiator_type) %>%
summarise(avg_feelings_youalone = mean(feelings_youalone, na.rm = TRUE),
avg_feelings_bothyoufirst = mean(feelings_bothyoufirst, na.rm = TRUE))
avg_feelings_long <- avg_feelings %>%
pivot_longer(cols = c(avg_feelings_bothyoufirst, avg_feelings_youalone),
names_to = "scenario",
values_to = "average_feelings")
avg_feelings_long$scenario <- ifelse(avg_feelings_long$scenario != "avg_feelings_youalone","receiving a return apology","not receiving a return apology")
ggplot(avg_feelings_long, aes(x = initiator_type, y = average_feelings, fill = scenario)) +
geom_bar(stat = "identity", position = position_dodge(width = 0.8), width = 0.8) +
geom_text(aes(label = sprintf("%.2f", average_feelings), y = ifelse(average_feelings >= 0, average_feelings + 1.5, average_feelings - 1.5)),
position = position_dodge(width = 0.8),
vjust = .3, size = 3) +
labs(title = "Anticipated feelings after apologizing first",
x = "Initiator Type",
y = "Average Feelings") +
scale_fill_manual(values = c("blue","light blue")) +
scale_y_continuous(breaks = seq(-30, 30, by = 10), limits = c(-30, 30), expand = c(0, 0)) +
theme_minimal() +
theme(legend.position = "top",
panel.border = element_rect(color = "black", fill = NA, size = 1),
panel.grid.major.y = element_line(color = "gray", size = 0.5),
axis.line = element_line(color = "black"))
## Warning: The `size` argument of `element_rect()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# extra analysis task 1
blame <- Data_Fall_2023 %>%
filter(Finished == "TRUE" & passedattn == "yes")
blame <- blame[blame$age != "149", ]
blame_bar <- blame[, c("blame_1","feelings_youalone", "feelings_bothyoufirst", "feelings_themalone", "feelings_boththemfirst", "feelings_neither", "feelings_youaloneforgiven", "initiator_type")]
print(describe(blame_bar$blame_1))
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 45 38.09 17.11 40 38.3 14.83 0 75 75 -0.1 -0.39 2.55
always_data_b <- blame_bar %>%
filter(initiator_type == "always")
conditional_data_b <- blame_bar %>%
filter(initiator_type == "conditional")
never_data_b <- blame_bar %>%
filter(initiator_type == "never")
print(describe(conditional_data_b$blame_1))
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 14 30.79 17.08 30 30.92 14.83 0 60 60 0.14 -0.76 4.56
print(describe(always_data_b$blame_1))
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 21 47.1 12.62 50 45.88 14.83 30 75 45 0.6 -0.27 2.75
print(describe(never_data_b$blame_1))
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 10 29.4 17.32 33 28.25 17.05 9 59 50 0.12 -1.52 5.48
Task 2 Question A
df <- Data_Fall_2023 %>%
filter(Finished == "TRUE" & passedattn == "yes" & age != "149")
df_bar <- old_data[, c("feelings_youalone", "feelings_bothyoufirst", "feelings_themalone", "feelings_boththemfirst", "feelings_neither", "feelings_youaloneforgiven")]
mean_feelings <- colMeans(df_bar)
print(mean_feelings)
## feelings_youalone feelings_bothyoufirst feelings_themalone
## -18.355556 7.844444 -5.533333
## feelings_boththemfirst feelings_neither feelings_youaloneforgiven
## 17.422222 -14.377778 -13.911111
sorted_feelings <- sort(mean_feelings, decreasing = TRUE)
scenario_names <- c("BothThemFirst","BothYouFirst", "ThemAlone",
"YouAloneForgiven", "Neither", "YouAlone")
df_plot <- data.frame(
Scenario = scenario_names,
Average_Feelings = sorted_feelings
)
df_plot$Scenario <- factor(df_plot$Scenario, levels = df_plot$Scenario[order(df_plot$Average_Feelings, decreasing = TRUE)])
se <- apply(df_bar, 2, function(x) sd(x, na.rm = TRUE) / sqrt(sum(!is.na(x))))
ggplot(df_plot, aes(x = Scenario, y = Average_Feelings)) +
geom_bar(stat = "identity", fill = "dark green", color = "black") +
geom_errorbar(aes(ymin = Average_Feelings - se, ymax = Average_Feelings + se),
width = 0.25, color = "black", y=0) +
geom_text(aes(label = sprintf("%.2f", Average_Feelings), y = ifelse(Average_Feelings >= 0, Average_Feelings + 3, Average_Feelings - 3.9)),
position = position_dodge(width = 1),
vjust = .3, size = 3)+
labs(title = "Anticipated Feelings in Each Scenario",
y = "Average Feelings",
x = "Scenario") +
ylim(min(sorted_feelings) - 4, max(sorted_feelings) + 4) +
theme_minimal() +
theme(legend.position = "top",
axis.text.x = element_text(angle = 45, hjust = 1),
axis.title.x = element_blank(),
panel.border = element_rect(color = "black", fill = NA, size = 1),
panel.grid.major.y = element_line(color = "gray", size = 0.5),
axis.line = element_line(color = "black"))
Task 2 Question B
df_anova <- old_data[, c("initials_box","feelings_youalone", "feelings_bothyoufirst", "feelings_themalone", "feelings_boththemfirst", "feelings_neither", "feelings_youaloneforgiven")]
df_long <- gather(df_anova, scenario_names, feelings, -initials_box)
df_long <- df_long %>% arrange(initials_box)
anova_result <- print(summary(aov(feelings ~ scenario_names, data = df_long)))
## Df Sum Sq Mean Sq F value Pr(>F)
## scenario_names 5 45547 9109 48.08 <2e-16 ***
## Residuals 264 50017 189
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
pairwise_results <- print(pairwise.t.test(df_long$feelings, df_long$scenario_names,
p.adjust.method = "none"))
##
## Pairwise comparisons using t tests with pooled SD
##
## data: df_long$feelings and df_long$scenario_names
##
## feelings_boththemfirst feelings_bothyoufirst
## feelings_bothyoufirst 0.0011 -
## feelings_neither < 2e-16 3.6e-13
## feelings_themalone 7.0e-14 6.3e-06
## feelings_youalone < 2e-16 < 2e-16
## feelings_youaloneforgiven < 2e-16 9.9e-13
## feelings_neither feelings_themalone feelings_youalone
## feelings_bothyoufirst - - -
## feelings_neither - - -
## feelings_themalone 0.0025 - -
## feelings_youalone 0.1716 1.5e-05 -
## feelings_youaloneforgiven 0.8724 0.0042 0.1268
##
## P value adjustment method: none
Task 2 Question C
df <- Data_Fall_2023 %>%
filter(Finished == "TRUE" & passedattn == "yes" & age != "149")
outcome_proportions <- table(df$outcome_binary1) / length(df$outcome_binary1)
df_plot <- as.data.frame(outcome_proportions)
names(df_plot) <- c("Outcome", "Proportion")
ggplot(df_plot, aes(x = Outcome, y = Proportion)) +
geom_bar(stat = "identity", position = "dodge", width = 0.5, fill = "#734f96") +
geom_text(aes(label = paste0(round(Proportion, 2))), vjust = -0.5, position = position_dodge(width = 0.5), size = 3.0) +
labs(title = "Proportion of People Choosing Each Option",
y = "Proportion",
x = "Outcome") +
scale_y_continuous(labels = scales::number_format(scale = 1), breaks = seq(0, 1, by = .2), limits = c(0, 1), expand = c(0, 0)) +
scale_x_discrete(labels = c("I apologize first, then ${e://Field/initials} apologizes." = "I apologize then they apologize",
"Neither I nor ${e://Field/initials} apologizes." = "Neither one of us apologizes"))+
theme_minimal() +
theme(axis.text.x = element_text(hjust = .5),
axis.title.x = element_blank(),
panel.border = element_rect(color = "black", fill = NA, size = 1),
panel.grid.major.y = element_line(color = "gray", size = 0.5),
axis.line = element_line(color = "black"))
contingency_table <- table(df$outcome_binary1)
chi_sq_test <- print(chisq.test(contingency_table))
##
## Chi-squared test for given probabilities
##
## data: contingency_table
## X-squared = 13.889, df = 1, p-value = 0.0001939
prop.test(contingency_table)
##
## 1-sample proportions test with continuity correction
##
## data: contingency_table, null probability 0.5
## X-squared = 12.8, df = 1, p-value = 0.0003466
## alternative hypothesis: true p is not equal to 0.5
## 95 percent confidence interval:
## 0.6252363 0.8828513
## sample estimates:
## p
## 0.7777778
Task 2 Question D
Data_Fall_2023 <- read_excel("C:/Data - Fall 2023.xlsx", sheet = "Data")
filtered_data <- Data_Fall_2023 %>%
filter(Finished == "TRUE" & passedattn == "yes" & age != "149")
sent_data <- filtered_data[, c("describe", "initiator_type","feelings_youalone", "feelings_bothyoufirst", "feelings_themalone", "feelings_boththemfirst",
"feelings_neither", "feelings_youaloneforgiven")]
sentences <- sent_data
sentiment_scores <- sentiment_by(sentences$describe)
sent_avg <- sentiment_scores$ave_sentiment
sent_data <- cbind(sent_data, sent_avg,
sentiment_category = ifelse(sentiment_scores$ave_sentiment > 0, "positive",
ifelse(sentiment_scores$ave_sentiment < 0, "negative", "neutral")))
avg_sentiment <- sent_data %>%
group_by(initiator_type) %>%
summarize(avg_sentiment = mean(sent_avg))
ggplot(avg_sentiment, aes(x = initiator_type, y = avg_sentiment, fill = initiator_type)) +
geom_bar(stat = "identity") +
labs(title = "Average Sentiment by Initiator Type",
x = "Initiator Type",
y = "Average Sentiment") +
scale_y_continuous(limits = c(-.2, .2), breaks = seq(-.15, .1, by = 0.03)) +
theme_minimal()
describe(sentiment_scores$ave_sentiment)
df_NLP <- sent_data
df_NLP$sentiment <- ifelse(sentiment_scores$ave_sentiment >= 0, "non negative", "negative")
df_NLP_long <- df_NLP %>%
pivot_longer(cols = starts_with(c("feelings_")), names_to = "feeling", values_to = "score")%>%
mutate(feeling = gsub("feelings_", "", feeling)) %>%
group_by(feeling, sentiment) %>%
summarise(avg_score = mean(score, na.rm = TRUE))
## `summarise()` has grouped output by 'feeling'. You can override using the
## `.groups` argument.
ggplot(df_NLP_long, aes(x = feeling, y = avg_score, fill = sentiment)) +
geom_bar(stat = "identity", position = "dodge", width = 0.8) +
geom_text(aes(label = sprintf("%.2f", avg_score), y = ifelse(avg_score >= 0, avg_score + 0.5, avg_score - 0.5)),
position = position_dodge(width = 0.8), vjust = 0.3, size = 3) +
labs(title = "Average Sentiment Score by Scenario",
x = "Scenario",
y = "Average Feelings") +
scale_fill_manual(values = c("non negative" = "#FFBF00", "negative" = "dark red")) +
theme_minimal() +
theme(legend.position = "top",
panel.border = element_rect(color = "black", fill = NA, size = 1),
panel.grid.major.y = element_line(color = "gray", size = 0.5),
axis.line = element_line(color = "black"),
axis.text.x = element_text(angle = 45, hjust = 1))