Setup

library(tidyverse)
library(magrittr)
library(DT)
library(knitr)
library(GGally)

study <-
  read_csv("G:/My Drive/homework/Sara A/study100200_51+2.csv") %>%
  select(-null)

study %>% datatable(options = list(pageLength = 5))

study %<>%
  filter(condition == 3 | condition == 4) %>%
  mutate(cdd = factor(condition, labels = c("control", "treatment"))) %>%
  select(-condition)

study %>% summary()

##   deportlopez      howfairly        deserved     howstrong_deport
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.000   
##  1st Qu.:1.000   1st Qu.:1.000   1st Qu.:1.000   1st Qu.:1.000   
##  Median :2.000   Median :2.000   Median :1.000   Median :2.000   
##  Mean   :2.477   Mean   :2.677   Mean   :2.065   Mean   :2.877   
##  3rd Qu.:4.000   3rd Qu.:4.000   3rd Qu.:3.000   3rd Qu.:4.000   
##  Max.   :7.000   Max.   :7.000   Max.   :7.000   Max.   :7.000   
##  NA's   :1       NA's   :1       NA's   :2       NA's   :2       
##  blame_choose1      blameMJ          blameUS          blameES      
##  Min.   :1.000   Min.   :  0.00   Min.   :  0.00   Min.   :  0.00  
##  1st Qu.:2.000   1st Qu.:  0.00   1st Qu.: 25.00   1st Qu.: 20.00  
##  Median :2.000   Median : 20.00   Median : 40.00   Median : 35.00  
##  Mean   :2.237   Mean   : 21.57   Mean   : 41.66   Mean   : 36.77  
##  3rd Qu.:3.000   3rd Qu.: 30.00   3rd Qu.: 60.00   3rd Qu.: 50.00  
##  Max.   :3.000   Max.   :100.00   Max.   :100.00   Max.   :100.00  
##  NA's   :4       NA's   :1        NA's   :1        NA's   :1       
##      study            sex              pid7              cdd    
##  Min.   :100.0   Min.   :0.0000   Min.   :1.000   control  :80  
##  1st Qu.:100.0   1st Qu.:0.0000   1st Qu.:1.000   treatment:76  
##  Median :200.0   Median :1.0000   Median :2.000                 
##  Mean   :155.8   Mean   :0.6623   Mean   :2.449                 
##  3rd Qu.:200.0   3rd Qu.:1.0000   3rd Qu.:3.000                 
##  Max.   :200.0   Max.   :1.0000   Max.   :7.000                 
##                  NA's   :2

study %>%
  mutate(across(.cols = -c(6:8), as_factor)) %>%
  summary()

##   deportlopez   howfairly     deserved  howstrong_deport blame_choose1
##  1      :65   1      :59   1      :92   1      :51       1   :24      
##  2      :32   2      :32   2      :17   2      :34       2   :68      
##  3      :17   4      :19   4      :15   4      :19       3   :60      
##  4      :15   3      :17   3      :13   3      :18       NA's: 4      
##  5      :15   6      :13   5      :10   7      :13                    
##  (Other):11   (Other):15   (Other): 7   (Other):19                    
##  NA's   : 1   NA's   : 1   NA's   : 2   NA's   : 2                    
##     blameMJ          blameUS          blameES       study      sex      pid7  
##  Min.   :  0.00   Min.   :  0.00   Min.   :  0.00   100:69   0   : 52   1:50  
##  1st Qu.:  0.00   1st Qu.: 25.00   1st Qu.: 20.00   200:87   1   :102   2:42  
##  Median : 20.00   Median : 40.00   Median : 35.00            NA's:  2   3:30  
##  Mean   : 21.57   Mean   : 41.66   Mean   : 36.77                       4:21  
##  3rd Qu.: 30.00   3rd Qu.: 60.00   3rd Qu.: 50.00                       5: 7  
##  Max.   :100.00   Max.   :100.00   Max.   :100.00                       6: 3  
##  NA's   :1        NA's   :1        NA's   :1                            7: 3  
##         cdd    
##  control  :80  
##  treatment:76  
##                
##                
##                
##                
##

# study %>%
#  mutate(across(.cols = -c(6:8), as_factor)) %>%
#  ggpairs()

Problem 01

Horizontal

question1 <-
  function(df){
    df %>%
      summarize(across(.cols = everything(),
                   .fns = list(Mean = ~ mean(.x, na.rm = TRUE),
                               StdDev = ~ sd(.x, na.rm = TRUE)
                               ),
                   .names = "{.col}.{.fn}"
                   )
            ) %>%
      mutate(across(.cols = -cdd, round, digits = 2)) %>%
      return()
  }

study %>%
  group_by(cdd) %>%
  select(1:4) %>%
  # drop_na() %>%
  question1() ->
  study.table1horz

## Adding missing grouping variables: `cdd`

study.table1horz %>% datatable()

study.table1horz %>% kable()

cdd	deportlopez.Mean	deportlopez.StdDev	howfairly.Mean	howfairly.StdDev	deserved.Mean	deserved.StdDev	howstrong_deport.Mean	howstrong_deport.StdDev
control	2.19	1.67	2.41	1.75	2.00	1.52	3.01	1.98
treatment	2.78	1.69	2.96	1.89	2.13	1.62	2.74	1.91

Vertical

study %>%
  select(1:4, cdd) %>%
  pivot_longer(-cdd,
               names_to = "Survey",
               values_to = "Repsonse"
               ) %>%
  group_by(Survey, cdd) %>%
  question1() ->
  study.table1vert

## `summarise()` has grouped output by 'Survey'. You can override using the `.groups` argument.

study.table1vert %>% datatable()

study.table1vert %>% kable()

Survey	cdd	Repsonse.Mean	Repsonse.StdDev
deportlopez	control	2.19	1.67
deportlopez	treatment	2.78	1.69
deserved	control	2.00	1.52
deserved	treatment	2.13	1.62
howfairly	control	2.41	1.75
howfairly	treatment	2.96	1.89
howstrong_deport	control	3.01	1.98
howstrong_deport	treatment	2.74	1.91

EDA

study %>%
  select(1:4, cdd) %>%
  mutate(across(.cols = everything(), as_factor)) %>%
  ggpairs(mapping = ggplot2::aes(colour = cdd))

study %>%
  select(1:4, cdd) %>%
  ggpairs(mapping = ggplot2::aes(colour = cdd))

Problem 02

Horizontal

question2 <-
  function(df){
    df %>%
      summarize(across(.cols = everything(),
                   .fns = list(Mean = ~ mean(.x, na.rm = TRUE),
                               StDev = ~ sd(.x, na.rm = TRUE),
                               Median = ~ median(.x, na.rm = TRUE),
                               Q1 = ~ quantile(.x, probs = 0.25, na.rm = TRUE),
                               Q3 = ~ quantile(.x, probs = 0.73, na.rm = TRUE),
                               IQR = ~ IQR(.x, na.rm = TRUE)
                               ),
                   .names = "{.col}.{.fn}"
                   )
            ) %>%
      mutate(across(.cols = -cdd, round, digits = 2)) %>%
      return()
  }

study %>%
  group_by(cdd) %>%
  select(6:8) %>%
  question2() ->
  study.table2horz

## Adding missing grouping variables: `cdd`

study.table2horz %>% datatable()

study.table2horz %>% kable()

cdd	blameMJ.Mean	blameMJ.StDev	blameMJ.Median	blameMJ.Q1	blameMJ.Q3	blameMJ.IQR	blameUS.Mean	blameUS.StDev	blameUS.Median	blameUS.Q1	blameUS.Q3	blameUS.IQR	blameES.Mean	blameES.StDev	blameES.Median	blameES.Q1	blameES.Q3	blameES.IQR
control	14.42	20.04	10	0	20.0	20.00	43.16	24.44	40	30.00	50	24.00	42.42	20.45	45	30.00	60	30.00
treatment	29.00	22.92	25	10	34.5	26.25	40.11	24.30	40	23.75	60	36.25	30.89	18.38	30	18.75	40	21.25

Vertical

study %>%
  select(6:8, cdd) %>%
  pivot_longer(-cdd,
               names_to = "Survey",
               values_to = "Repsonse"
               ) %>%
  group_by(Survey, cdd) %>%
  question2() ->
  study.table2vert

## `summarise()` has grouped output by 'Survey'. You can override using the `.groups` argument.

study.table2vert %>% datatable()

study.table2vert %>% kable()

Survey	cdd	Repsonse.Mean	Repsonse.StDev	Repsonse.Median	Repsonse.Q1	Repsonse.Q3	Repsonse.IQR
blameES	control	42.42	20.45	45	30.00	60.0	30.00
blameES	treatment	30.89	18.38	30	18.75	40.0	21.25
blameMJ	control	14.42	20.04	10	0.00	20.0	20.00
blameMJ	treatment	29.00	22.92	25	10.00	34.5	26.25
blameUS	control	43.16	24.44	40	30.00	50.0	24.00
blameUS	treatment	40.11	24.30	40	23.75	60.0	36.25

EDA

study %>%
  select(6:8, cdd) %>%
  ggpairs(mapping = ggplot2::aes(colour = cdd))

Problem 04

study %>%
  drop_na() %>%
        mutate(deportlopez = recode(deportlopez,
                                    '1' = "Strongly Oppose",
                                    '2' = "Moderately Oppose",
                                    '3' = "Slightly Oppose",
                                    '4' = "Neither",
                                    '5' = "Slightly Support",
                                    '6' = "Moderately Support",
                                    '7' = "Strongly Support"
                                    )
               ) %>%
  ggplot(aes(deportlopez, fill = cdd)) +
  geom_bar(position = "dodge") +
  ggtitle("Do you support or oppose the decision to deport Miguel Lopez?") +
  theme(axis.title.x = element_blank(),
        axis.text.x = element_text(angle = 90,
                                   vjust = 0.25,
                                   hjust = 1
                                   )
        )

Problem 06

study %>%
  ggplot(aes(blameMJ, fill = cdd)) +
  geom_boxplot() +
  ggtitle("How much blame should be given to Miguel Lopez himself?") +
  xlab("Percent") +
  theme(axis.text.y = element_blank())

## Warning: Removed 1 rows containing non-finite values (stat_boxplot).

Problem 08

By Condition

study %>%
  ggplot(aes(blameMJ, fill = cdd)) +
        geom_histogram() +
        ggtitle("How much blame should be given to Miguel Lopez himself?") +
        xlab("Percent") +
        facet_wrap(vars(cdd))

Extra

study %>%
  drop_na(sex) %>%
  ggplot(aes(blameMJ)) +
        geom_histogram() +
        ggtitle("How much blame should be given to Miguel Lopez himself?") +
        xlab("Percent") +
        facet_grid(rows = vars(cdd), 
                   cols = vars(sex),
                   labeller = "label_both")

Problem 10

By Condition

study %>%
  ggplot(aes(blameUS, fill = cdd)) +
        geom_histogram() +
        ggtitle("How much blame should be given to U.S. Immigration?") +
        xlab("Percent") +
        facet_wrap(vars(cdd))

Extra

study %>%
  drop_na(sex) %>%
  ggplot(aes(blameUS)) +
        geom_histogram() +
        ggtitle("How much blame should be given to U.S. Immigration?") +
        xlab("Percent") +
        facet_grid(rows = vars(cdd), 
                   cols = vars(sex),
                   labeller = "label_both")

07.16.2020 Sara A:DAP1

Nathan at GrandValleyTutor.com

7/18/2021

Setup

Problem 01

Horizontal

Vertical

EDA

Problem 02

Horizontal

Vertical

EDA

Problem 04

Problem 06

Problem 08

By Condition

Extra

Problem 10

By Condition

Extra