library(readr)
## Warning: package 'readr' was built under R version 4.2.3
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.2.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyverse)
## Warning: package 'ggplot2' was built under R version 4.2.3
## Warning: package 'tidyr' was built under R version 4.2.3
## Warning: package 'stringr' was built under R version 4.2.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.0 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
library(knitr)
library(broom)
require(tibble)
Write-up and interpretation:
As the outcome below, ATE is 0.06145983, which represents the difference in the average mobility score between those who were exposed to rags-to-riches TV shows and those who were not (control group). Hence, in this context means ATE indicates the impact of watching rags-to-riches TV shows on respondents’ beliefs in economic mobility. A positive ATE will increase the belief in economic mobility, while a negative ATE will decrease in this belief.
# Load the dataset
rags <- read_csv("https://bit.ly/3GYRblb")
## Rows: 763 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (6): mobility, condition2, rep, dem, optimism_index, sjs_index
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Subset 'rags' to include respondents who identify as Optimist or Pessimist
rags <- rags %>%
filter(optimism_index >= 1 & optimism_index <= 5) %>%
mutate(
treatment = ifelse(condition2 == 1, "Rags to Riches", "Control"),
optimist = ifelse(optimism_index >= 3, "Optimist", "Pessimist")
)
# Create "mobility_diff" to calculate the difference in means
mobility_diff <- rags %>%
group_by(treatment) %>%
summarize(mean_mobility = mean(mobility, na.rm = TRUE)) %>%
pivot_wider(names_from = treatment, values_from = mean_mobility) %>%
mutate(ATE = `Rags to Riches` - Control); mobility_diff
## # A tibble: 1 × 3
## Control `Rags to Riches` ATE
## <dbl> <dbl> <dbl>
## 1 0.527 0.589 0.0615
# Estimated ATE in the columns
cat("The ATE value of mobility_diff is", mobility_diff$ATE)
## The ATE value of mobility_diff is 0.06145983
Discuss if the CI contains zero? What does that mean?
The density histogram below shows the 95% confidence interval for the
ATE is 0.03633351 to 0.08619964. To check the CI contains zero or not,
by using a code of ifelse(0 >=
ate_ci_95$lower_ci & 0 <=
ate_ci_95$upper_ci, “Yes”, “No”), the result
indicates “2.5% No”. It means the zero is NOT within
the confidence interval, which indicates a statistically significant
difference in perceptions of economic mobility between the two groups.
But if the confidence interval contains zero, it means there is no
statistically significant difference in perceptions of economic mobility
between those who watched rags-to-riches TV shows and those who did
not.
# Follow from the assignment instruction:
library(infer)
## Warning: package 'infer' was built under R version 4.2.3
set.seed(2023)
# Generate 1,000 bootstrap replications of the ATE using rep_slice_sample()
ate_boots <- rags %>%
rep_slice_sample(prop = 1, replace = TRUE, reps = 1000) %>%
group_by(replicate, treatment) %>%
summarize(mean_mobility = mean(mobility, .groups="drop")) %>%
pivot_wider(
names_from = treatment, values_from = mean_mobility) %>%
mutate(ATE=`Rags to Riches`-`Control`); ate_boots
## `summarise()` has grouped output by 'replicate'. You can override using the
## `.groups` argument.
## # A tibble: 1,000 × 4
## # Groups: replicate [1,000]
## replicate Control `Rags to Riches` ATE
## <int> <dbl> <dbl> <dbl>
## 1 1 0.535 0.584 0.0497
## 2 2 0.530 0.577 0.0477
## 3 3 0.524 0.610 0.0861
## 4 4 0.525 0.596 0.0708
## 5 5 0.524 0.585 0.0615
## 6 6 0.530 0.596 0.0652
## 7 7 0.524 0.591 0.0673
## 8 8 0.528 0.599 0.0708
## 9 9 0.526 0.588 0.0613
## 10 10 0.523 0.593 0.0699
## # ℹ 990 more rows
# Calculate a 95% confidence interval for the difference in means
ate_ci_95 <- ate_boots %>%
select(replicate, ATE) %>%
get_confidence_interval(level=0.95,type="percentile"); ate_ci_95
## # A tibble: 1 × 2
## lower_ci upper_ci
## <dbl> <dbl>
## 1 0.0363 0.0862
# Plot the bootstrap distribution using a density histogram
ate_boots %>%
ggplot(aes(x = ATE)) +
geom_histogram(aes(y = after_stat(density)), binwidth = 0.005, fill = "aquamarine4", color = "black") +
geom_vline(xintercept = unlist(ate_ci_95), linewidth = 1, color = "chocolate3") +
labs(title = "Bootstrap Distribution of ATE",
x = "ATE",
y = "Density") +
theme_minimal()
Interpret 95% confidence intervals:
Interpret 100% confidence intervals:
Reporting the effect and interpretation:
The results below effects the interaction between watching rags-to-riches TV shows and people’s optimism levels shows how the impact of these shows differs between Optimists and Pessimists. In this study, Optimists experienced an Average Treatment Effect (ATE) of 0.08700281, meaning they were affected 0.08700281 more positively by the shows compared to Pessimists, who had an ATE of -0.02403495. Therefore, the difference between these ATEs is 0.1110378. This indicates the influence of rags-to-riches TV shows on perceptions of economic mobility varies depending on whether individuals are Optimists or Pessimists. This insight highlights how individual beliefs shape the way media exposure influences views on social advancement.
# Calculate ATE for Optimists
ATE_Opt <- rags %>%
filter(optimist == "Optimist") %>%
group_by(treatment) %>%
summarize(mean_opt = mean(mobility)) %>%
pivot_wider(names_from = treatment, values_from = mean_opt) %>%
mutate(ATEopt = `Rags to Riches`-`Control`); ATE_Opt
## # A tibble: 1 × 3
## Control `Rags to Riches` ATEopt
## <dbl> <dbl> <dbl>
## 1 0.538 0.625 0.0870
# Calculate ATE for Pessimists
ATE_Pes <- rags %>%
filter(optimist == "Pessimist") %>%
group_by(treatment) %>%
summarize(mean_opt = mean(mobility)) %>%
pivot_wider(names_from = treatment, values_from = mean_opt) %>%
mutate(ATEpes=`Rags to Riches`-`Control`); ATE_Pes
## # A tibble: 1 × 3
## Control `Rags to Riches` ATEpes
## <dbl> <dbl> <dbl>
## 1 0.501 0.477 -0.0240
# Determine the interaction between ATE for Optimists and Pessimists to see the differences
ATE_Diff <- ATE_Opt$ATEopt-ATE_Pes$ATEpes; ATE_Diff
## [1] 0.1110378
# Create ate_optimist tibble and report
ate_optimist <- tibble(
ATE_Opt = ATE_Opt$ATEopt,
ATE_Pes = ATE_Pes$ATEpes,
ATE_Diff = ATE_Diff
); ate_optimist
## # A tibble: 1 × 3
## ATE_Opt ATE_Pes ATE_Diff
## <dbl> <dbl> <dbl>
## 1 0.0870 -0.0240 0.111
Discuss if the CI contains zero? What does that mean?
As the plot below, it shows the difference in ATEs between Optimists
and Pessimists of 1,000 rows does not contains zero, the range is
between 0.056 to 0.170. To know the CI contains zero or not, also can
check with ifelse(ate_optimist_ci_95$lower_ci <=
0 && ate_optimist_ci_95$upper_ci >= 0, “Yes”,
“No”). The result is No meaning CI Not
contains zero. Therefore, it implies that individuals’ optimism
levels influence how they perceive and are influenced by such media
narratives, highlighting the importance of considering individual
differences in media effects research.
# Set the code according to the assignment description:
set.seed(2023)
# Use 'rep_slice_sample' to generate 1,000 boostrap replications of the difference in ATEs
# Calculate Optimists first
ate_opt_boots <- rags %>%
filter(optimist=="Optimist") %>%
rep_slice_sample(prop=1,replace=TRUE,reps=1000) %>%
group_by(replicate, treatment) %>%
summarize(mean_opt=mean(mobility),.groups="drop") %>%
pivot_wider(names_from = treatment, values_from = mean_opt) %>%
mutate(ATEopt=`Rags to Riches`-`Control`); ate_opt_boots
## # A tibble: 1,000 × 4
## replicate Control `Rags to Riches` ATEopt
## <int> <dbl> <dbl> <dbl>
## 1 1 0.538 0.621 0.0831
## 2 2 0.537 0.616 0.0796
## 3 3 0.529 0.637 0.108
## 4 4 0.533 0.623 0.0899
## 5 5 0.535 0.617 0.0828
## 6 6 0.539 0.623 0.0846
## 7 7 0.539 0.610 0.0708
## 8 8 0.546 0.608 0.0625
## 9 9 0.547 0.639 0.0920
## 10 10 0.538 0.624 0.0858
## # ℹ 990 more rows
# Then, calculate Pessimists
ate_pes_boots <- rags %>%
filter(optimist=="Pessimist") %>%
rep_slice_sample(prop=1,replace=TRUE,reps=1000) %>%
group_by(replicate, treatment) %>%
summarize(mean_opt=mean(mobility),.groups="drop") %>%
pivot_wider(names_from = treatment, values_from = mean_opt) %>%
mutate(ATEpes=`Rags to Riches`-`Control`); ate_pes_boots
## # A tibble: 1,000 × 4
## replicate Control `Rags to Riches` ATEpes
## <int> <dbl> <dbl> <dbl>
## 1 1 0.492 0.545 0.0533
## 2 2 0.491 0.445 -0.0456
## 3 3 0.511 0.472 -0.0389
## 4 4 0.531 0.504 -0.0268
## 5 5 0.499 0.451 -0.0479
## 6 6 0.514 0.454 -0.0595
## 7 7 0.507 0.471 -0.0355
## 8 8 0.492 0.498 0.00678
## 9 9 0.503 0.509 0.00532
## 10 10 0.511 0.490 -0.0201
## # ℹ 990 more rows
# Calculate the difference boostrap between Optimists and Pessimists
ate_diff_boots <- ate_opt_boots$ATEopt - ate_pes_boots$ATEpes
# See the 1,000 boostrap replications of the difference
ate_optimist_boots <- cbind(
ate_opt_boots$replicate,
ate_opt_boots$ATEopt,
ate_pes_boots$ATEpes,
ate_diff_boots)
# Create 1,000 boostrap replication
ate_optimist_boots <- ate_optimist_boots %>%
as_tibble() %>%
rename(Replicate=V1,
ATE_Opt=V2,
ATE_Pes=V3,
ATE_Diff=ate_diff_boots)
## Warning: The `x` argument of `as_tibble.matrix()` must have unique column names if
## `.name_repair` is omitted as of tibble 2.0.0.
## ℹ Using compatibility `.name_repair`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# Construct 95% confidence interval for the difference between the ATE
ate_optimist_ci_95 <-
ate_optimist_boots %>%
select(Replicate, ATE_Diff) %>%
get_confidence_interval(level=0.95,type="percentile"); ate_optimist_ci_95
## # A tibble: 1 × 2
## lower_ci upper_ci
## <dbl> <dbl>
## 1 0.0560 0.170
# Plot the bootstrap distribution
ate_optimist_boots %>%
ggplot(aes(x=ATE_Diff)) +
geom_histogram(aes(y=after_stat(density)), binwidth=0.01, fill = "darkolivegreen1", color="black") +
geom_vline(xintercept = unlist(ate_optimist_ci_95), linewidth = 1, color = "red") +
labs(title = "Bootstrap Distribution of Difference in ATEs",
x = "Difference in ATEs",
y = "Density") +
theme_minimal()