Data task

Task 1.1

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(tidyr)
library(psych)
library(readxl)
library(tinytex)
library(ggplot2)

## 
## Attaching package: 'ggplot2'

## The following objects are masked from 'package:psych':
## 
##     %+%, alpha

library(sentimentr)

Data_Fall_2023 <- read_excel("C:/Data - Fall 2023.xlsx", sheet = "Data")
filtered_data <- Data_Fall_2023 %>% 
  filter(Finished == "TRUE" & passedattn == "yes" & age != "149")
old_data <- filtered_data[, c("initials_box", "target_sex", "sex", "age","initiator_type","feelings_youalone", "feelings_bothyoufirst", "feelings_themalone", "feelings_boththemfirst",
                              "feelings_neither", "feelings_youaloneforgiven")]

summary_cols <- print(summary(old_data[c("age")]))

##       age       
##  Min.   :18.00  
##  1st Qu.:26.00  
##  Median :29.00  
##  Mean   :31.07  
##  3rd Qu.:32.00  
##  Max.   :73.00

sex_summary <- print((table(old_data$sex)))

## 
## Female   Male 
##     26     19

sex_summaryt <- print((table(old_data$target_sex)))

## 
## Female   Male 
##     25     20

feelingsyoualone <- print(describe(old_data$feelings_youalone))

##    vars  n   mean    sd median trimmed   mad min max range skew kurtosis   se
## X1    1 45 -18.36 12.03    -20  -20.08 14.83 -30  10    40 0.89    -0.09 1.79

feelingsyoubothyou <- print(describe(old_data$feelings_bothyoufirst))

##    vars  n mean    sd median trimmed   mad min max range  skew kurtosis   se
## X1    1 45 7.84 14.45     10    9.19 14.83 -30  30    60 -0.84    -0.09 2.15

t_test_result <- print(t.test(old_data$feelings_youalone, old_data$feelings_bothyoufirst, paired = TRUE))

## 
##  Paired t-test
## 
## data:  old_data$feelings_youalone and old_data$feelings_bothyoufirst
## t = -12.625, df = 44, p-value = 3.168e-16
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  -30.38239 -22.01761
## sample estimates:
## mean difference 
##           -26.2

Task 1.2

always_data <- old_data %>%
  filter(initiator_type == "always")
conditional_data <- old_data %>%
  filter(initiator_type == "conditional")
never_data <- old_data %>%
  filter(initiator_type == "never")
# always
t_test_feelings_a <- print(t.test(always_data$feelings_youalone, always_data$feelings_bothyoufirst, paired = TRUE))

## 
##  Paired t-test
## 
## data:  always_data$feelings_youalone and always_data$feelings_bothyoufirst
## t = -10.466, df = 20, p-value = 1.464e-09
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  -31.46769 -21.00850
## sample estimates:
## mean difference 
##        -26.2381

print(describe(always_data$feelings_youalone))

##    vars  n   mean    sd median trimmed   mad min max range skew kurtosis  se
## X1    1 21 -13.48 11.92    -14  -14.24 10.38 -30  10    40 0.56    -0.63 2.6

print(describe(always_data$feelings_bothyoufirst))

##    vars  n  mean    sd median trimmed   mad min max range  skew kurtosis   se
## X1    1 21 12.76 11.05     13   13.53 10.38 -10  30    40 -0.57    -0.52 2.41

# conditional
t_test_feelings_c <- print(t.test(conditional_data$feelings_youalone, conditional_data$feelings_bothyoufirst, paired = TRUE))

## 
##  Paired t-test
## 
## data:  conditional_data$feelings_youalone and conditional_data$feelings_bothyoufirst
## t = -5.9957, df = 13, p-value = 4.478e-05
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  -39.35217 -18.50497
## sample estimates:
## mean difference 
##       -28.92857

print(describe(conditional_data$feelings_youalone))

##    vars  n   mean    sd median trimmed mad min max range skew kurtosis  se
## X1    1 14 -24.07 11.61    -30  -26.42   0 -30  10    40 1.91      2.6 3.1

print(describe(conditional_data$feelings_bothyoufirst))

##    vars  n mean    sd median trimmed   mad min max range  skew kurtosis   se
## X1    1 14 4.86 16.97    9.5    6.08 15.57 -30  25    55 -0.65    -0.94 4.53

# never
t_test_feelings_n <- print(t.test(never_data$feelings_youalone, never_data$feelings_bothyoufirst, paired = TRUE))

## 
##  Paired t-test
## 
## data:  never_data$feelings_youalone and never_data$feelings_bothyoufirst
## t = -5.666, df = 9, p-value = 0.0003073
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  -31.20325 -13.39675
## sample estimates:
## mean difference 
##           -22.3

print(describe(never_data$feelings_youalone))

##    vars  n  mean  sd median trimmed   mad min max range skew kurtosis   se
## X1    1 10 -20.6 9.2    -20     -21 14.83 -30  -8    22 0.18    -1.79 2.91

print(describe(never_data$feelings_bothyoufirst))

##    vars  n mean    sd median trimmed  mad min max range  skew kurtosis   se
## X1    1 10  1.7 14.89      9     2.5 8.15 -24  21    45 -0.56    -1.29 4.71

Task 1.3

forgiven_data <- old_data
forgiven_feelings <- forgiven_data$feelings_youaloneforgiven
print(describe(forgiven_feelings))

##    vars  n   mean    sd median trimmed   mad min max range skew kurtosis   se
## X1    1 45 -13.91 15.62    -19  -15.38 16.31 -30  22    52 0.52    -0.93 2.33

t_test_youaloneforgiven <- print(t.test(forgiven_feelings, forgiven_data$feelings_youalone, paired = TRUE))

## 
##  Paired t-test
## 
## data:  forgiven_feelings and forgiven_data$feelings_youalone
## t = 2.1424, df = 44, p-value = 0.03773
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  0.2635444 8.6253445
## sample estimates:
## mean difference 
##        4.444444

t_test_bothyoufirstforgiven <-print(t.test(forgiven_feelings, forgiven_data$feelings_bothyoufirst, paired = TRUE))

## 
##  Paired t-test
## 
## data:  forgiven_feelings and forgiven_data$feelings_bothyoufirst
## t = -8.4042, df = 44, p-value = 1.068e-10
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  -26.97266 -16.53845
## sample estimates:
## mean difference 
##       -21.75556

Additional Figures and Anaylsis

avg_feelings <- old_data %>%
  group_by(initiator_type) %>%
  summarise(avg_feelings_youalone = mean(feelings_youalone, na.rm = TRUE),
            avg_feelings_bothyoufirst = mean(feelings_bothyoufirst, na.rm = TRUE))

avg_feelings_long <- avg_feelings %>%
  pivot_longer(cols = c(avg_feelings_bothyoufirst, avg_feelings_youalone),
               names_to = "scenario",
               values_to = "average_feelings")

avg_feelings_long$scenario <- ifelse(avg_feelings_long$scenario != "avg_feelings_youalone","receiving a return apology","not receiving a return apology")

ggplot(avg_feelings_long, aes(x = initiator_type, y = average_feelings, fill = scenario)) +
  geom_bar(stat = "identity", position = position_dodge(width = 0.8), width = 0.8) +
  geom_text(aes(label = sprintf("%.2f", average_feelings), y = ifelse(average_feelings >= 0, average_feelings + 1.5, average_feelings - 1.5)), 
            position = position_dodge(width = 0.8), 
            vjust = .3, size = 3) +
  labs(title = "Anticipated feelings after apologizing first",
       x = "Initiator Type",
       y = "Average Feelings") +
  scale_fill_manual(values = c("blue","light blue")) +
  scale_y_continuous(breaks = seq(-30, 30, by = 10), limits = c(-30, 30), expand = c(0, 0)) + 
  theme_minimal() +
  theme(legend.position = "top",
        panel.border = element_rect(color = "black", fill = NA, size = 1),
        panel.grid.major.y = element_line(color = "gray", size = 0.5),
        axis.line = element_line(color = "black"))

## Warning: The `size` argument of `element_rect()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

## Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

# extra analysis task 1
blame <- Data_Fall_2023 %>% 
  filter(Finished == "TRUE" & passedattn == "yes")
blame <- blame[blame$age != "149", ]
blame_bar <- blame[, c("blame_1","feelings_youalone", "feelings_bothyoufirst", "feelings_themalone", "feelings_boththemfirst", "feelings_neither", "feelings_youaloneforgiven", "initiator_type")]
print(describe(blame_bar$blame_1))

##    vars  n  mean    sd median trimmed   mad min max range skew kurtosis   se
## X1    1 45 38.09 17.11     40    38.3 14.83   0  75    75 -0.1    -0.39 2.55

always_data_b <- blame_bar %>%
  filter(initiator_type == "always")
conditional_data_b <- blame_bar %>%
  filter(initiator_type == "conditional")
never_data_b <- blame_bar %>%
  filter(initiator_type == "never")
print(describe(conditional_data_b$blame_1))

##    vars  n  mean    sd median trimmed   mad min max range skew kurtosis   se
## X1    1 14 30.79 17.08     30   30.92 14.83   0  60    60 0.14    -0.76 4.56

print(describe(always_data_b$blame_1))

##    vars  n mean    sd median trimmed   mad min max range skew kurtosis   se
## X1    1 21 47.1 12.62     50   45.88 14.83  30  75    45  0.6    -0.27 2.75

print(describe(never_data_b$blame_1))

##    vars  n mean    sd median trimmed   mad min max range skew kurtosis   se
## X1    1 10 29.4 17.32     33   28.25 17.05   9  59    50 0.12    -1.52 5.48

Task 2 Question A

df <- Data_Fall_2023 %>% 
  filter(Finished == "TRUE" & passedattn == "yes" & age != "149")
df_bar <- old_data[, c("feelings_youalone", "feelings_bothyoufirst", "feelings_themalone", "feelings_boththemfirst", "feelings_neither", "feelings_youaloneforgiven")]
mean_feelings <- colMeans(df_bar)
print(mean_feelings)

##         feelings_youalone     feelings_bothyoufirst        feelings_themalone 
##                -18.355556                  7.844444                 -5.533333 
##    feelings_boththemfirst          feelings_neither feelings_youaloneforgiven 
##                 17.422222                -14.377778                -13.911111

sorted_feelings <- sort(mean_feelings, decreasing = TRUE)
scenario_names <- c("BothThemFirst","BothYouFirst", "ThemAlone", 
                    "YouAloneForgiven", "Neither", "YouAlone")
df_plot <- data.frame(
  Scenario = scenario_names,
  Average_Feelings = sorted_feelings
)
df_plot$Scenario <- factor(df_plot$Scenario, levels = df_plot$Scenario[order(df_plot$Average_Feelings, decreasing = TRUE)])
se <- apply(df_bar, 2, function(x) sd(x, na.rm = TRUE) / sqrt(sum(!is.na(x))))
ggplot(df_plot, aes(x = Scenario, y = Average_Feelings)) +
  geom_bar(stat = "identity", fill = "dark green", color = "black") +
  geom_errorbar(aes(ymin = Average_Feelings - se, ymax = Average_Feelings + se),
                width = 0.25, color = "black", y=0) +
  geom_text(aes(label = sprintf("%.2f", Average_Feelings), y = ifelse(Average_Feelings >= 0, Average_Feelings + 3, Average_Feelings - 3.9)), 
            position = position_dodge(width = 1), 
            vjust = .3, size = 3)+
  labs(title = "Anticipated Feelings in Each Scenario",
       y = "Average Feelings",
       x = "Scenario") +
  ylim(min(sorted_feelings) - 4, max(sorted_feelings) + 4) +
  theme_minimal() +
  theme(legend.position = "top",
        axis.text.x = element_text(angle = 45, hjust = 1),
        axis.title.x = element_blank(),
        panel.border = element_rect(color = "black", fill = NA, size = 1),
        panel.grid.major.y = element_line(color = "gray", size = 0.5),
        axis.line = element_line(color = "black"))

Task 2 Question B

df_anova <- old_data[, c("initials_box","feelings_youalone", "feelings_bothyoufirst", "feelings_themalone", "feelings_boththemfirst", "feelings_neither", "feelings_youaloneforgiven")]
df_long <- gather(df_anova, scenario_names, feelings, -initials_box)
df_long <- df_long %>% arrange(initials_box)

anova_result <- print(summary(aov(feelings ~ scenario_names, data = df_long)))

##                 Df Sum Sq Mean Sq F value Pr(>F)    
## scenario_names   5  45547    9109   48.08 <2e-16 ***
## Residuals      264  50017     189                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

pairwise_results <- print(pairwise.t.test(df_long$feelings, df_long$scenario_names,
                                          p.adjust.method = "none"))

## 
##  Pairwise comparisons using t tests with pooled SD 
## 
## data:  df_long$feelings and df_long$scenario_names 
## 
##                           feelings_boththemfirst feelings_bothyoufirst
## feelings_bothyoufirst     0.0011                 -                    
## feelings_neither          < 2e-16                3.6e-13              
## feelings_themalone        7.0e-14                6.3e-06              
## feelings_youalone         < 2e-16                < 2e-16              
## feelings_youaloneforgiven < 2e-16                9.9e-13              
##                           feelings_neither feelings_themalone feelings_youalone
## feelings_bothyoufirst     -                -                  -                
## feelings_neither          -                -                  -                
## feelings_themalone        0.0025           -                  -                
## feelings_youalone         0.1716           1.5e-05            -                
## feelings_youaloneforgiven 0.8724           0.0042             0.1268           
## 
## P value adjustment method: none

Task 2 Question C

df <- Data_Fall_2023 %>% 
  filter(Finished == "TRUE" & passedattn == "yes" & age != "149")
outcome_proportions <- table(df$outcome_binary1) / length(df$outcome_binary1)
df_plot <- as.data.frame(outcome_proportions)
names(df_plot) <- c("Outcome", "Proportion")

ggplot(df_plot, aes(x = Outcome, y = Proportion)) +
  geom_bar(stat = "identity", position = "dodge", width = 0.5, fill = "#734f96") +
  geom_text(aes(label = paste0(round(Proportion, 2))), vjust = -0.5, position = position_dodge(width = 0.5), size = 3.0) +
  labs(title = "Proportion of People Choosing Each Option",
       y = "Proportion",
       x = "Outcome") +
  scale_y_continuous(labels = scales::number_format(scale = 1), breaks = seq(0, 1, by = .2), limits = c(0, 1), expand = c(0, 0)) +
  scale_x_discrete(labels = c("I apologize first, then ${e://Field/initials} apologizes." = "I apologize then they apologize",
                              "Neither I nor ${e://Field/initials} apologizes." = "Neither one of us apologizes"))+
  theme_minimal() +
  theme(axis.text.x = element_text(hjust = .5),
        axis.title.x = element_blank(),
        panel.border = element_rect(color = "black", fill = NA, size = 1),
        panel.grid.major.y = element_line(color = "gray", size = 0.5),
        axis.line = element_line(color = "black"))

contingency_table <- table(df$outcome_binary1)
chi_sq_test <- print(chisq.test(contingency_table))

## 
##  Chi-squared test for given probabilities
## 
## data:  contingency_table
## X-squared = 13.889, df = 1, p-value = 0.0001939

prop.test(contingency_table)

## 
##  1-sample proportions test with continuity correction
## 
## data:  contingency_table, null probability 0.5
## X-squared = 12.8, df = 1, p-value = 0.0003466
## alternative hypothesis: true p is not equal to 0.5
## 95 percent confidence interval:
##  0.6252363 0.8828513
## sample estimates:
##         p 
## 0.7777778

Task 2 Question D

Data_Fall_2023 <- read_excel("C:/Data - Fall 2023.xlsx", sheet = "Data")
filtered_data <- Data_Fall_2023 %>% 
  filter(Finished == "TRUE" & passedattn == "yes" & age != "149")
sent_data <- filtered_data[, c("describe", "initiator_type","feelings_youalone", "feelings_bothyoufirst", "feelings_themalone", "feelings_boththemfirst",
                              "feelings_neither", "feelings_youaloneforgiven")]

sentences <- sent_data
sentiment_scores <- sentiment_by(sentences$describe)
sent_avg <-  sentiment_scores$ave_sentiment
sent_data <- cbind(sent_data, sent_avg, 
                   sentiment_category = ifelse(sentiment_scores$ave_sentiment > 0, "positive",
                                               ifelse(sentiment_scores$ave_sentiment < 0, "negative", "neutral")))


avg_sentiment <- sent_data %>%
  group_by(initiator_type) %>%
  summarize(avg_sentiment = mean(sent_avg))


ggplot(avg_sentiment, aes(x = initiator_type, y = avg_sentiment, fill = initiator_type)) +
  geom_bar(stat = "identity") +
  labs(title = "Average Sentiment by Initiator Type",
       x = "Initiator Type",
       y = "Average Sentiment") +
  scale_y_continuous(limits = c(-.2, .2), breaks = seq(-.15, .1, by = 0.03)) +
  theme_minimal()

describe(sentiment_scores$ave_sentiment)

df_NLP <- sent_data
df_NLP$sentiment <- ifelse(sentiment_scores$ave_sentiment >= 0, "non negative", "negative")

df_NLP_long <- df_NLP %>%
  pivot_longer(cols = starts_with(c("feelings_")), names_to = "feeling", values_to = "score")%>%
  mutate(feeling = gsub("feelings_", "", feeling)) %>%
  group_by(feeling, sentiment) %>%
  summarise(avg_score = mean(score, na.rm = TRUE))

## `summarise()` has grouped output by 'feeling'. You can override using the
## `.groups` argument.

ggplot(df_NLP_long, aes(x = feeling, y = avg_score, fill = sentiment)) +
  geom_bar(stat = "identity", position = "dodge", width = 0.8) +
  geom_text(aes(label = sprintf("%.2f", avg_score), y = ifelse(avg_score >= 0, avg_score + 0.5, avg_score - 0.5)), 
            position = position_dodge(width = 0.8), vjust = 0.3, size = 3) +
  labs(title = "Average Sentiment Score by Scenario",
       x = "Scenario",
       y = "Average Feelings") +
  scale_fill_manual(values = c("non negative" = "#FFBF00", "negative" = "dark red")) +
  theme_minimal() +
  theme(legend.position = "top",
        panel.border = element_rect(color = "black", fill = NA, size = 1),
        panel.grid.major.y = element_line(color = "gray", size = 0.5),
        axis.line = element_line(color = "black"),
        axis.text.x = element_text(angle = 45, hjust = 1))