Install the necessary packages:

library(readr)
## Warning: package 'readr' was built under R version 4.2.3
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.2.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyverse)
## Warning: package 'ggplot2' was built under R version 4.2.3
## Warning: package 'tidyr' was built under R version 4.2.3
## Warning: package 'stringr' was built under R version 4.2.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.0     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
library(knitr)
library(broom)
require(tibble)

Question 1

Identify as Optimist or Pessimist, create new variables and report the outcomes

Write-up and interpretation:

As the outcome below, ATE is 0.06145983, which represents the difference in the average mobility score between those who were exposed to rags-to-riches TV shows and those who were not (control group). Hence, in this context means ATE indicates the impact of watching rags-to-riches TV shows on respondents’ beliefs in economic mobility. A positive ATE will increase the belief in economic mobility, while a negative ATE will decrease in this belief.

# Load the dataset
rags <- read_csv("https://bit.ly/3GYRblb")
## Rows: 763 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (6): mobility, condition2, rep, dem, optimism_index, sjs_index
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Subset 'rags' to include respondents who identify as Optimist or Pessimist
rags <- rags %>%
  filter(optimism_index >= 1 & optimism_index <= 5) %>%
  mutate(
    treatment = ifelse(condition2 == 1, "Rags to Riches", "Control"),
    optimist = ifelse(optimism_index >= 3, "Optimist", "Pessimist")
  )
       
# Create "mobility_diff" to calculate the difference in means
mobility_diff <- rags %>%
  group_by(treatment) %>%
  summarize(mean_mobility = mean(mobility, na.rm = TRUE)) %>%
  pivot_wider(names_from = treatment, values_from = mean_mobility) %>%
  mutate(ATE = `Rags to Riches` - Control); mobility_diff
## # A tibble: 1 × 3
##   Control `Rags to Riches`    ATE
##     <dbl>            <dbl>  <dbl>
## 1   0.527            0.589 0.0615
# Estimated ATE in the columns
cat("The ATE value of mobility_diff is", mobility_diff$ATE)
## The ATE value of mobility_diff is 0.06145983

Question 2

Generate 1,000 bootstrap replications of the estimated ATE

Discuss if the CI contains zero? What does that mean?

The density histogram below shows the 95% confidence interval for the ATE is 0.03633351 to 0.08619964. To check the CI contains zero or not, by using a code of ifelse(0 >= ate_ci_95$lower_ci & 0 <= ate_ci_95$upper_ci, “Yes”, “No”), the result indicates “2.5% No”. It means the zero is NOT within the confidence interval, which indicates a statistically significant difference in perceptions of economic mobility between the two groups. But if the confidence interval contains zero, it means there is no statistically significant difference in perceptions of economic mobility between those who watched rags-to-riches TV shows and those who did not.

# Follow from the assignment instruction:
library(infer)
## Warning: package 'infer' was built under R version 4.2.3
set.seed(2023)

# Generate 1,000 bootstrap replications of the ATE using rep_slice_sample()
ate_boots <- rags %>%
  rep_slice_sample(prop = 1, replace = TRUE, reps = 1000) %>%
  group_by(replicate, treatment) %>%
  summarize(mean_mobility = mean(mobility, .groups="drop")) %>%
              pivot_wider(
                names_from = treatment, values_from = mean_mobility) %>%
  mutate(ATE=`Rags to Riches`-`Control`); ate_boots
## `summarise()` has grouped output by 'replicate'. You can override using the
## `.groups` argument.
## # A tibble: 1,000 × 4
## # Groups:   replicate [1,000]
##    replicate Control `Rags to Riches`    ATE
##        <int>   <dbl>            <dbl>  <dbl>
##  1         1   0.535            0.584 0.0497
##  2         2   0.530            0.577 0.0477
##  3         3   0.524            0.610 0.0861
##  4         4   0.525            0.596 0.0708
##  5         5   0.524            0.585 0.0615
##  6         6   0.530            0.596 0.0652
##  7         7   0.524            0.591 0.0673
##  8         8   0.528            0.599 0.0708
##  9         9   0.526            0.588 0.0613
## 10        10   0.523            0.593 0.0699
## # ℹ 990 more rows
# Calculate a 95% confidence interval for the difference in means 
ate_ci_95 <- ate_boots %>%
  select(replicate, ATE) %>%
  get_confidence_interval(level=0.95,type="percentile"); ate_ci_95
## # A tibble: 1 × 2
##   lower_ci upper_ci
##      <dbl>    <dbl>
## 1   0.0363   0.0862
# Plot the bootstrap distribution using a density histogram
ate_boots %>% 
  ggplot(aes(x = ATE)) +
  geom_histogram(aes(y = after_stat(density)), binwidth = 0.005, fill = "aquamarine4", color = "black") +
  geom_vline(xintercept = unlist(ate_ci_95), linewidth = 1, color = "chocolate3") +
  labs(title = "Bootstrap Distribution of ATE",
       x = "ATE",
       y = "Density") +
  theme_minimal()

Question 3

Explain how to interpret 95% confidence intervals in terms of repeated sampling. Is it possible to produce a 100% confidence interval in this setting? If so, what is it and is it useful?

Interpret 95% confidence intervals:

  • In the theory, if taking many samples from the same population and computing a confidence interval for each sample, it would expect approximately 95% of those intervals to contain the true population parameter.
  • Give some simple example, if we’re estimating the average height of adult males in a certain city, and construct a 95% confidence interval for this parameter based on a sample of heights. It would expect that if we were to take 100 samples and compute confidence intervals for each, approximately 95 of those intervals would contain the true average height of adult males in the city.

Interpret 100% confidence intervals:

  • It’s possible to produce a 100% confidence interval which could guarantee the data contains with the true parameter value. However, it’s not a useful information for inference because an interval will be broad and span the entire range of possible values for the parameter, ranging from negative infinity to positive infinity. It doesn’t help to make the decisions or draw the conclusions about the population.
  • The useful confidence intervals must be with lower confidence levels like 95% or 90%, which are more commonly used to balance the trade-off between precision and coverage. These intervals provide a useful range of values within the true parameter lies based on the sample data.

Question 4

Compute the ATE for respondents identified as Optimists and Pessimists, and determine the interaction between these two ATEs

Reporting the effect and interpretation:

The results below effects the interaction between watching rags-to-riches TV shows and people’s optimism levels shows how the impact of these shows differs between Optimists and Pessimists. In this study, Optimists experienced an Average Treatment Effect (ATE) of 0.08700281, meaning they were affected 0.08700281 more positively by the shows compared to Pessimists, who had an ATE of -0.02403495. Therefore, the difference between these ATEs is 0.1110378. This indicates the influence of rags-to-riches TV shows on perceptions of economic mobility varies depending on whether individuals are Optimists or Pessimists. This insight highlights how individual beliefs shape the way media exposure influences views on social advancement.

# Calculate ATE for Optimists
ATE_Opt <- rags %>%
  filter(optimist == "Optimist") %>%
  group_by(treatment) %>%
  summarize(mean_opt = mean(mobility)) %>%
              pivot_wider(names_from = treatment, values_from = mean_opt) %>%
  mutate(ATEopt = `Rags to Riches`-`Control`); ATE_Opt
## # A tibble: 1 × 3
##   Control `Rags to Riches` ATEopt
##     <dbl>            <dbl>  <dbl>
## 1   0.538            0.625 0.0870
# Calculate ATE for Pessimists
ATE_Pes <- rags %>%
  filter(optimist == "Pessimist") %>%
  group_by(treatment) %>%
  summarize(mean_opt = mean(mobility)) %>%
  pivot_wider(names_from = treatment, values_from = mean_opt) %>%
   mutate(ATEpes=`Rags to Riches`-`Control`); ATE_Pes
## # A tibble: 1 × 3
##   Control `Rags to Riches`  ATEpes
##     <dbl>            <dbl>   <dbl>
## 1   0.501            0.477 -0.0240
# Determine the interaction between ATE for Optimists and Pessimists to see the differences
ATE_Diff <- ATE_Opt$ATEopt-ATE_Pes$ATEpes; ATE_Diff
## [1] 0.1110378
# Create ate_optimist tibble and report
ate_optimist <- tibble(
  ATE_Opt = ATE_Opt$ATEopt,
  ATE_Pes = ATE_Pes$ATEpes,
  ATE_Diff = ATE_Diff
); ate_optimist
## # A tibble: 1 × 3
##   ATE_Opt ATE_Pes ATE_Diff
##     <dbl>   <dbl>    <dbl>
## 1  0.0870 -0.0240    0.111

Question 5

Generate 1,000 boostrap replications of the difference in ATEs between Optimists and Pessimists from Question 4, and construct a 95% confidence interval for the difference between the ATE

Discuss if the CI contains zero? What does that mean?

As the plot below, it shows the difference in ATEs between Optimists and Pessimists of 1,000 rows does not contains zero, the range is between 0.056 to 0.170. To know the CI contains zero or not, also can check with ifelse(ate_optimist_ci_95$lower_ci <= 0 && ate_optimist_ci_95$upper_ci >= 0, “Yes”, “No”). The result is No meaning CI Not contains zero. Therefore, it implies that individuals’ optimism levels influence how they perceive and are influenced by such media narratives, highlighting the importance of considering individual differences in media effects research.

# Set the code according to the assignment description:
set.seed(2023)

# Use 'rep_slice_sample' to generate 1,000 boostrap replications of the difference in ATEs 

# Calculate Optimists first
ate_opt_boots <- rags %>%
  filter(optimist=="Optimist") %>%
  rep_slice_sample(prop=1,replace=TRUE,reps=1000) %>%
  group_by(replicate, treatment) %>%
  summarize(mean_opt=mean(mobility),.groups="drop") %>%
  pivot_wider(names_from = treatment, values_from = mean_opt) %>%
  mutate(ATEopt=`Rags to Riches`-`Control`); ate_opt_boots
## # A tibble: 1,000 × 4
##    replicate Control `Rags to Riches` ATEopt
##        <int>   <dbl>            <dbl>  <dbl>
##  1         1   0.538            0.621 0.0831
##  2         2   0.537            0.616 0.0796
##  3         3   0.529            0.637 0.108 
##  4         4   0.533            0.623 0.0899
##  5         5   0.535            0.617 0.0828
##  6         6   0.539            0.623 0.0846
##  7         7   0.539            0.610 0.0708
##  8         8   0.546            0.608 0.0625
##  9         9   0.547            0.639 0.0920
## 10        10   0.538            0.624 0.0858
## # ℹ 990 more rows
# Then, calculate Pessimists 
ate_pes_boots <- rags %>%
  filter(optimist=="Pessimist") %>%
  rep_slice_sample(prop=1,replace=TRUE,reps=1000) %>%
  group_by(replicate, treatment) %>%
  summarize(mean_opt=mean(mobility),.groups="drop") %>%
  pivot_wider(names_from = treatment, values_from = mean_opt) %>%
  mutate(ATEpes=`Rags to Riches`-`Control`); ate_pes_boots
## # A tibble: 1,000 × 4
##    replicate Control `Rags to Riches`   ATEpes
##        <int>   <dbl>            <dbl>    <dbl>
##  1         1   0.492            0.545  0.0533 
##  2         2   0.491            0.445 -0.0456 
##  3         3   0.511            0.472 -0.0389 
##  4         4   0.531            0.504 -0.0268 
##  5         5   0.499            0.451 -0.0479 
##  6         6   0.514            0.454 -0.0595 
##  7         7   0.507            0.471 -0.0355 
##  8         8   0.492            0.498  0.00678
##  9         9   0.503            0.509  0.00532
## 10        10   0.511            0.490 -0.0201 
## # ℹ 990 more rows
# Calculate the difference boostrap between Optimists and Pessimists
ate_diff_boots <- ate_opt_boots$ATEopt - ate_pes_boots$ATEpes

# See the 1,000 boostrap replications of the difference
ate_optimist_boots <- cbind(
  ate_opt_boots$replicate,
  ate_opt_boots$ATEopt,
  ate_pes_boots$ATEpes,
  ate_diff_boots)

# Create 1,000 boostrap replication 
ate_optimist_boots <- ate_optimist_boots %>%
  as_tibble() %>%
  rename(Replicate=V1,
         ATE_Opt=V2,
         ATE_Pes=V3,
         ATE_Diff=ate_diff_boots)
## Warning: The `x` argument of `as_tibble.matrix()` must have unique column names if
## `.name_repair` is omitted as of tibble 2.0.0.
## ℹ Using compatibility `.name_repair`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# Construct 95% confidence interval for the difference between the ATE 
ate_optimist_ci_95 <-
  ate_optimist_boots %>%
  select(Replicate, ATE_Diff) %>%
  get_confidence_interval(level=0.95,type="percentile"); ate_optimist_ci_95 
## # A tibble: 1 × 2
##   lower_ci upper_ci
##      <dbl>    <dbl>
## 1   0.0560    0.170
# Plot the bootstrap distribution
ate_optimist_boots %>%
  ggplot(aes(x=ATE_Diff)) +
  geom_histogram(aes(y=after_stat(density)), binwidth=0.01, fill = "darkolivegreen1", color="black") +
  geom_vline(xintercept = unlist(ate_optimist_ci_95), linewidth = 1, color = "red") +
  labs(title = "Bootstrap Distribution of Difference in ATEs",
       x = "Difference in ATEs",
       y = "Density") +
  theme_minimal()