Setup

library(tidyverse)
library(magrittr)
library(DT)
library(knitr)
library(GGally)
study <-
  read_csv("G:/My Drive/homework/Sara A/study100200_51+2.csv") %>%
  select(-null)

study %>% datatable(options = list(pageLength = 5))
study %<>%
  filter(condition == 3 | condition == 4) %>%
  mutate(cdd = factor(condition, labels = c("control", "treatment"))) %>%
  select(-condition)

study %>% summary()
##   deportlopez      howfairly        deserved     howstrong_deport
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.000   
##  1st Qu.:1.000   1st Qu.:1.000   1st Qu.:1.000   1st Qu.:1.000   
##  Median :2.000   Median :2.000   Median :1.000   Median :2.000   
##  Mean   :2.477   Mean   :2.677   Mean   :2.065   Mean   :2.877   
##  3rd Qu.:4.000   3rd Qu.:4.000   3rd Qu.:3.000   3rd Qu.:4.000   
##  Max.   :7.000   Max.   :7.000   Max.   :7.000   Max.   :7.000   
##  NA's   :1       NA's   :1       NA's   :2       NA's   :2       
##  blame_choose1      blameMJ          blameUS          blameES      
##  Min.   :1.000   Min.   :  0.00   Min.   :  0.00   Min.   :  0.00  
##  1st Qu.:2.000   1st Qu.:  0.00   1st Qu.: 25.00   1st Qu.: 20.00  
##  Median :2.000   Median : 20.00   Median : 40.00   Median : 35.00  
##  Mean   :2.237   Mean   : 21.57   Mean   : 41.66   Mean   : 36.77  
##  3rd Qu.:3.000   3rd Qu.: 30.00   3rd Qu.: 60.00   3rd Qu.: 50.00  
##  Max.   :3.000   Max.   :100.00   Max.   :100.00   Max.   :100.00  
##  NA's   :4       NA's   :1        NA's   :1        NA's   :1       
##      study            sex              pid7              cdd    
##  Min.   :100.0   Min.   :0.0000   Min.   :1.000   control  :80  
##  1st Qu.:100.0   1st Qu.:0.0000   1st Qu.:1.000   treatment:76  
##  Median :200.0   Median :1.0000   Median :2.000                 
##  Mean   :155.8   Mean   :0.6623   Mean   :2.449                 
##  3rd Qu.:200.0   3rd Qu.:1.0000   3rd Qu.:3.000                 
##  Max.   :200.0   Max.   :1.0000   Max.   :7.000                 
##                  NA's   :2
study %>%
  mutate(across(.cols = -c(6:8), as_factor)) %>%
  summary()
##   deportlopez   howfairly     deserved  howstrong_deport blame_choose1
##  1      :65   1      :59   1      :92   1      :51       1   :24      
##  2      :32   2      :32   2      :17   2      :34       2   :68      
##  3      :17   4      :19   4      :15   4      :19       3   :60      
##  4      :15   3      :17   3      :13   3      :18       NA's: 4      
##  5      :15   6      :13   5      :10   7      :13                    
##  (Other):11   (Other):15   (Other): 7   (Other):19                    
##  NA's   : 1   NA's   : 1   NA's   : 2   NA's   : 2                    
##     blameMJ          blameUS          blameES       study      sex      pid7  
##  Min.   :  0.00   Min.   :  0.00   Min.   :  0.00   100:69   0   : 52   1:50  
##  1st Qu.:  0.00   1st Qu.: 25.00   1st Qu.: 20.00   200:87   1   :102   2:42  
##  Median : 20.00   Median : 40.00   Median : 35.00            NA's:  2   3:30  
##  Mean   : 21.57   Mean   : 41.66   Mean   : 36.77                       4:21  
##  3rd Qu.: 30.00   3rd Qu.: 60.00   3rd Qu.: 50.00                       5: 7  
##  Max.   :100.00   Max.   :100.00   Max.   :100.00                       6: 3  
##  NA's   :1        NA's   :1        NA's   :1                            7: 3  
##         cdd    
##  control  :80  
##  treatment:76  
##                
##                
##                
##                
## 
# study %>%
#  mutate(across(.cols = -c(6:8), as_factor)) %>%
#  ggpairs()

Problem 01

Horizontal

question1 <-
  function(df){
    df %>%
      summarize(across(.cols = everything(),
                   .fns = list(Mean = ~ mean(.x, na.rm = TRUE),
                               StdDev = ~ sd(.x, na.rm = TRUE)
                               ),
                   .names = "{.col}.{.fn}"
                   )
            ) %>%
      mutate(across(.cols = -cdd, round, digits = 2)) %>%
      return()
  }

study %>%
  group_by(cdd) %>%
  select(1:4) %>%
  # drop_na() %>%
  question1() ->
  study.table1horz
## Adding missing grouping variables: `cdd`
study.table1horz %>% datatable()
study.table1horz %>% kable()
cdd deportlopez.Mean deportlopez.StdDev howfairly.Mean howfairly.StdDev deserved.Mean deserved.StdDev howstrong_deport.Mean howstrong_deport.StdDev
control 2.19 1.67 2.41 1.75 2.00 1.52 3.01 1.98
treatment 2.78 1.69 2.96 1.89 2.13 1.62 2.74 1.91

Vertical

study %>%
  select(1:4, cdd) %>%
  pivot_longer(-cdd,
               names_to = "Survey",
               values_to = "Repsonse"
               ) %>%
  group_by(Survey, cdd) %>%
  question1() ->
  study.table1vert
## `summarise()` has grouped output by 'Survey'. You can override using the `.groups` argument.
study.table1vert %>% datatable()
study.table1vert %>% kable()
Survey cdd Repsonse.Mean Repsonse.StdDev
deportlopez control 2.19 1.67
deportlopez treatment 2.78 1.69
deserved control 2.00 1.52
deserved treatment 2.13 1.62
howfairly control 2.41 1.75
howfairly treatment 2.96 1.89
howstrong_deport control 3.01 1.98
howstrong_deport treatment 2.74 1.91

EDA

study %>%
  select(1:4, cdd) %>%
  mutate(across(.cols = everything(), as_factor)) %>%
  ggpairs(mapping = ggplot2::aes(colour = cdd))

study %>%
  select(1:4, cdd) %>%
  ggpairs(mapping = ggplot2::aes(colour = cdd))

Problem 02

Horizontal

question2 <-
  function(df){
    df %>%
      summarize(across(.cols = everything(),
                   .fns = list(Mean = ~ mean(.x, na.rm = TRUE),
                               StDev = ~ sd(.x, na.rm = TRUE),
                               Median = ~ median(.x, na.rm = TRUE),
                               Q1 = ~ quantile(.x, probs = 0.25, na.rm = TRUE),
                               Q3 = ~ quantile(.x, probs = 0.73, na.rm = TRUE),
                               IQR = ~ IQR(.x, na.rm = TRUE)
                               ),
                   .names = "{.col}.{.fn}"
                   )
            ) %>%
      mutate(across(.cols = -cdd, round, digits = 2)) %>%
      return()
  }

study %>%
  group_by(cdd) %>%
  select(6:8) %>%
  question2() ->
  study.table2horz
## Adding missing grouping variables: `cdd`
study.table2horz %>% datatable()
study.table2horz %>% kable()
cdd blameMJ.Mean blameMJ.StDev blameMJ.Median blameMJ.Q1 blameMJ.Q3 blameMJ.IQR blameUS.Mean blameUS.StDev blameUS.Median blameUS.Q1 blameUS.Q3 blameUS.IQR blameES.Mean blameES.StDev blameES.Median blameES.Q1 blameES.Q3 blameES.IQR
control 14.42 20.04 10 0 20.0 20.00 43.16 24.44 40 30.00 50 24.00 42.42 20.45 45 30.00 60 30.00
treatment 29.00 22.92 25 10 34.5 26.25 40.11 24.30 40 23.75 60 36.25 30.89 18.38 30 18.75 40 21.25

Vertical

study %>%
  select(6:8, cdd) %>%
  pivot_longer(-cdd,
               names_to = "Survey",
               values_to = "Repsonse"
               ) %>%
  group_by(Survey, cdd) %>%
  question2() ->
  study.table2vert
## `summarise()` has grouped output by 'Survey'. You can override using the `.groups` argument.
study.table2vert %>% datatable()
study.table2vert %>% kable()
Survey cdd Repsonse.Mean Repsonse.StDev Repsonse.Median Repsonse.Q1 Repsonse.Q3 Repsonse.IQR
blameES control 42.42 20.45 45 30.00 60.0 30.00
blameES treatment 30.89 18.38 30 18.75 40.0 21.25
blameMJ control 14.42 20.04 10 0.00 20.0 20.00
blameMJ treatment 29.00 22.92 25 10.00 34.5 26.25
blameUS control 43.16 24.44 40 30.00 50.0 24.00
blameUS treatment 40.11 24.30 40 23.75 60.0 36.25

EDA

study %>%
  select(6:8, cdd) %>%
  ggpairs(mapping = ggplot2::aes(colour = cdd))

Problem 04

study %>%
  drop_na() %>%
        mutate(deportlopez = recode(deportlopez,
                                    '1' = "Strongly Oppose",
                                    '2' = "Moderately Oppose",
                                    '3' = "Slightly Oppose",
                                    '4' = "Neither",
                                    '5' = "Slightly Support",
                                    '6' = "Moderately Support",
                                    '7' = "Strongly Support"
                                    )
               ) %>%
  ggplot(aes(deportlopez, fill = cdd)) +
  geom_bar(position = "dodge") +
  ggtitle("Do you support or oppose the decision to deport Miguel Lopez?") +
  theme(axis.title.x = element_blank(),
        axis.text.x = element_text(angle = 90,
                                   vjust = 0.25,
                                   hjust = 1
                                   )
        )

Problem 06

study %>%
  ggplot(aes(blameMJ, fill = cdd)) +
  geom_boxplot() +
  ggtitle("How much blame should be given to Miguel Lopez himself?") +
  xlab("Percent") +
  theme(axis.text.y = element_blank())
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).

Problem 08

By Condition

study %>%
  ggplot(aes(blameMJ, fill = cdd)) +
        geom_histogram() +
        ggtitle("How much blame should be given to Miguel Lopez himself?") +
        xlab("Percent") +
        facet_wrap(vars(cdd))

Extra

study %>%
  drop_na(sex) %>%
  ggplot(aes(blameMJ)) +
        geom_histogram() +
        ggtitle("How much blame should be given to Miguel Lopez himself?") +
        xlab("Percent") +
        facet_grid(rows = vars(cdd), 
                   cols = vars(sex),
                   labeller = "label_both")

Problem 10

By Condition

study %>%
  ggplot(aes(blameUS, fill = cdd)) +
        geom_histogram() +
        ggtitle("How much blame should be given to U.S. Immigration?") +
        xlab("Percent") +
        facet_wrap(vars(cdd))

Extra

study %>%
  drop_na(sex) %>%
  ggplot(aes(blameUS)) +
        geom_histogram() +
        ggtitle("How much blame should be given to U.S. Immigration?") +
        xlab("Percent") +
        facet_grid(rows = vars(cdd), 
                   cols = vars(sex),
                   labeller = "label_both")