Setup
library(tidyverse)
library(magrittr)
library(DT)
library(knitr)
library(GGally)
study <-
read_csv("G:/My Drive/homework/Sara A/study100200_51+2.csv") %>%
select(-null)
study %>% datatable(options = list(pageLength = 5))
study %<>%
filter(condition == 3 | condition == 4) %>%
mutate(cdd = factor(condition, labels = c("control", "treatment"))) %>%
select(-condition)
study %>% summary()
## deportlopez howfairly deserved howstrong_deport
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:1.000 1st Qu.:1.000 1st Qu.:1.000 1st Qu.:1.000
## Median :2.000 Median :2.000 Median :1.000 Median :2.000
## Mean :2.477 Mean :2.677 Mean :2.065 Mean :2.877
## 3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:3.000 3rd Qu.:4.000
## Max. :7.000 Max. :7.000 Max. :7.000 Max. :7.000
## NA's :1 NA's :1 NA's :2 NA's :2
## blame_choose1 blameMJ blameUS blameES
## Min. :1.000 Min. : 0.00 Min. : 0.00 Min. : 0.00
## 1st Qu.:2.000 1st Qu.: 0.00 1st Qu.: 25.00 1st Qu.: 20.00
## Median :2.000 Median : 20.00 Median : 40.00 Median : 35.00
## Mean :2.237 Mean : 21.57 Mean : 41.66 Mean : 36.77
## 3rd Qu.:3.000 3rd Qu.: 30.00 3rd Qu.: 60.00 3rd Qu.: 50.00
## Max. :3.000 Max. :100.00 Max. :100.00 Max. :100.00
## NA's :4 NA's :1 NA's :1 NA's :1
## study sex pid7 cdd
## Min. :100.0 Min. :0.0000 Min. :1.000 control :80
## 1st Qu.:100.0 1st Qu.:0.0000 1st Qu.:1.000 treatment:76
## Median :200.0 Median :1.0000 Median :2.000
## Mean :155.8 Mean :0.6623 Mean :2.449
## 3rd Qu.:200.0 3rd Qu.:1.0000 3rd Qu.:3.000
## Max. :200.0 Max. :1.0000 Max. :7.000
## NA's :2
study %>%
mutate(across(.cols = -c(6:8), as_factor)) %>%
summary()
## deportlopez howfairly deserved howstrong_deport blame_choose1
## 1 :65 1 :59 1 :92 1 :51 1 :24
## 2 :32 2 :32 2 :17 2 :34 2 :68
## 3 :17 4 :19 4 :15 4 :19 3 :60
## 4 :15 3 :17 3 :13 3 :18 NA's: 4
## 5 :15 6 :13 5 :10 7 :13
## (Other):11 (Other):15 (Other): 7 (Other):19
## NA's : 1 NA's : 1 NA's : 2 NA's : 2
## blameMJ blameUS blameES study sex pid7
## Min. : 0.00 Min. : 0.00 Min. : 0.00 100:69 0 : 52 1:50
## 1st Qu.: 0.00 1st Qu.: 25.00 1st Qu.: 20.00 200:87 1 :102 2:42
## Median : 20.00 Median : 40.00 Median : 35.00 NA's: 2 3:30
## Mean : 21.57 Mean : 41.66 Mean : 36.77 4:21
## 3rd Qu.: 30.00 3rd Qu.: 60.00 3rd Qu.: 50.00 5: 7
## Max. :100.00 Max. :100.00 Max. :100.00 6: 3
## NA's :1 NA's :1 NA's :1 7: 3
## cdd
## control :80
## treatment:76
##
##
##
##
##
# study %>%
# mutate(across(.cols = -c(6:8), as_factor)) %>%
# ggpairs()
Problem 01
Horizontal
question1 <-
function(df){
df %>%
summarize(across(.cols = everything(),
.fns = list(Mean = ~ mean(.x, na.rm = TRUE),
StdDev = ~ sd(.x, na.rm = TRUE)
),
.names = "{.col}.{.fn}"
)
) %>%
mutate(across(.cols = -cdd, round, digits = 2)) %>%
return()
}
study %>%
group_by(cdd) %>%
select(1:4) %>%
# drop_na() %>%
question1() ->
study.table1horz
## Adding missing grouping variables: `cdd`
study.table1horz %>% datatable()
study.table1horz %>% kable()
control |
2.19 |
1.67 |
2.41 |
1.75 |
2.00 |
1.52 |
3.01 |
1.98 |
treatment |
2.78 |
1.69 |
2.96 |
1.89 |
2.13 |
1.62 |
2.74 |
1.91 |
Vertical
study %>%
select(1:4, cdd) %>%
pivot_longer(-cdd,
names_to = "Survey",
values_to = "Repsonse"
) %>%
group_by(Survey, cdd) %>%
question1() ->
study.table1vert
## `summarise()` has grouped output by 'Survey'. You can override using the `.groups` argument.
study.table1vert %>% datatable()
study.table1vert %>% kable()
deportlopez |
control |
2.19 |
1.67 |
deportlopez |
treatment |
2.78 |
1.69 |
deserved |
control |
2.00 |
1.52 |
deserved |
treatment |
2.13 |
1.62 |
howfairly |
control |
2.41 |
1.75 |
howfairly |
treatment |
2.96 |
1.89 |
howstrong_deport |
control |
3.01 |
1.98 |
howstrong_deport |
treatment |
2.74 |
1.91 |
EDA
study %>%
select(1:4, cdd) %>%
mutate(across(.cols = everything(), as_factor)) %>%
ggpairs(mapping = ggplot2::aes(colour = cdd))

study %>%
select(1:4, cdd) %>%
ggpairs(mapping = ggplot2::aes(colour = cdd))

Problem 02
Horizontal
question2 <-
function(df){
df %>%
summarize(across(.cols = everything(),
.fns = list(Mean = ~ mean(.x, na.rm = TRUE),
StDev = ~ sd(.x, na.rm = TRUE),
Median = ~ median(.x, na.rm = TRUE),
Q1 = ~ quantile(.x, probs = 0.25, na.rm = TRUE),
Q3 = ~ quantile(.x, probs = 0.73, na.rm = TRUE),
IQR = ~ IQR(.x, na.rm = TRUE)
),
.names = "{.col}.{.fn}"
)
) %>%
mutate(across(.cols = -cdd, round, digits = 2)) %>%
return()
}
study %>%
group_by(cdd) %>%
select(6:8) %>%
question2() ->
study.table2horz
## Adding missing grouping variables: `cdd`
study.table2horz %>% datatable()
study.table2horz %>% kable()
control |
14.42 |
20.04 |
10 |
0 |
20.0 |
20.00 |
43.16 |
24.44 |
40 |
30.00 |
50 |
24.00 |
42.42 |
20.45 |
45 |
30.00 |
60 |
30.00 |
treatment |
29.00 |
22.92 |
25 |
10 |
34.5 |
26.25 |
40.11 |
24.30 |
40 |
23.75 |
60 |
36.25 |
30.89 |
18.38 |
30 |
18.75 |
40 |
21.25 |
Vertical
study %>%
select(6:8, cdd) %>%
pivot_longer(-cdd,
names_to = "Survey",
values_to = "Repsonse"
) %>%
group_by(Survey, cdd) %>%
question2() ->
study.table2vert
## `summarise()` has grouped output by 'Survey'. You can override using the `.groups` argument.
study.table2vert %>% datatable()
study.table2vert %>% kable()
blameES |
control |
42.42 |
20.45 |
45 |
30.00 |
60.0 |
30.00 |
blameES |
treatment |
30.89 |
18.38 |
30 |
18.75 |
40.0 |
21.25 |
blameMJ |
control |
14.42 |
20.04 |
10 |
0.00 |
20.0 |
20.00 |
blameMJ |
treatment |
29.00 |
22.92 |
25 |
10.00 |
34.5 |
26.25 |
blameUS |
control |
43.16 |
24.44 |
40 |
30.00 |
50.0 |
24.00 |
blameUS |
treatment |
40.11 |
24.30 |
40 |
23.75 |
60.0 |
36.25 |
EDA
study %>%
select(6:8, cdd) %>%
ggpairs(mapping = ggplot2::aes(colour = cdd))

Problem 04
study %>%
drop_na() %>%
mutate(deportlopez = recode(deportlopez,
'1' = "Strongly Oppose",
'2' = "Moderately Oppose",
'3' = "Slightly Oppose",
'4' = "Neither",
'5' = "Slightly Support",
'6' = "Moderately Support",
'7' = "Strongly Support"
)
) %>%
ggplot(aes(deportlopez, fill = cdd)) +
geom_bar(position = "dodge") +
ggtitle("Do you support or oppose the decision to deport Miguel Lopez?") +
theme(axis.title.x = element_blank(),
axis.text.x = element_text(angle = 90,
vjust = 0.25,
hjust = 1
)
)

Problem 06
study %>%
ggplot(aes(blameMJ, fill = cdd)) +
geom_boxplot() +
ggtitle("How much blame should be given to Miguel Lopez himself?") +
xlab("Percent") +
theme(axis.text.y = element_blank())
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).

Problem 08
By Condition
study %>%
ggplot(aes(blameMJ, fill = cdd)) +
geom_histogram() +
ggtitle("How much blame should be given to Miguel Lopez himself?") +
xlab("Percent") +
facet_wrap(vars(cdd))

Problem 10
By Condition
study %>%
ggplot(aes(blameUS, fill = cdd)) +
geom_histogram() +
ggtitle("How much blame should be given to U.S. Immigration?") +
xlab("Percent") +
facet_wrap(vars(cdd))
