Data
library(foreign)
library(foreign)
library(car)
library(MASS)
library(tidyverse)
library(psych)
library(rstatix)
library(reshape2)
library(readr)
library(gridExtra)
library(dplyr)
library(effsize)
data <- read.spss("/Users/admin/Downloads/ESS8e02_1.sav", to.data.frame=TRUE)
Combine groups in the data
library(dplyr)
data <- data %>%
select(ccgdbd, gndr)
data$ccgdbd1 <- case_when(
data$ccgdbd == "1" | data$ccgdbd == "2" | data$ccgdbd == "3" | data$ccgdbd == "Extremely bad" ~ "neg",
data$ccgdbd == "4" | data$ccgdbd == "5" | data$ccgdbd == "6" ~ "mid",
data$ccgdbd == "7" | data$ccgdbd == "8" | data$ccgdbd == "9" | data$ccgdbd == "Extremely good" ~ "god",
TRUE ~ "unknown"
)
data <- data %>%
filter(ccgdbd1 != "unknown")
Q ccgdbd: How good or bad do you think the impact of climate change will be on people across the world? Please choose a number from 0 to 10, where 0 is extremely bad and 10 is extremely good.
Research hypothesis: Females and males have different opinions toward the impact of climate change.
Check assumptions
class(data$ccgdbd1)
## [1] "character"
data$ccgdbd1 <- as.factor(data$ccgdbd1)
class(data$ccgdbd1)
## [1] "factor"
class(data$gndr)
## [1] "factor"
table(data$ccgdbd1, data$gndr)
##
## Male Female
## god 1699 1770
## mid 6531 7434
## neg 11334 12459
data_tbl <- table(data$ccgdbd1, data$gndr)
Assumptions are met
H0: There are no difference in attitude towards the impact of climate change between males and females. HA: There are difference in attitude towards the impact of tclimate change between males and females.
Visualization
library(sjPlot)
plot_xtab(data$ccgdbd1, data$gndr, margin = "row", bar.pos = "stack",
show.summary = TRUE)
Based on the graph it seems like there are no assosiation between gender and attitude towards climate change. Let’s run the test.
Chi-square
chisq.test(data$ccgdbd1, data$gndr)
##
## Pearson's Chi-squared test
##
## data: data$ccgdbd1 and data$gndr
## X-squared = 6.1849, df = 2, p-value = 0.04539
chi <- chisq.test(data$ccgdbd1, data$gndr)
P-value < 0,05 –> We reject H0 and accept HA It means that there are statistically significant difference in attitudes towards climate change between males and females.
chi$stdres
## data$gndr
## data$ccgdbd1 Male Female
## god 1.8762290 -1.8762290
## mid -2.0005472 2.0005472
## neg 0.8622589 -0.8622589
stdres <- chi$stdres
library(corrplot)
corrplot(stdres, is.corr = FALSE, method = "num")
There are much less females, who think that impact of climate change will be good, than we expected. On the other hand there are much more males, who think like this, than it was expected. At the same time there are much more females who have moderate opinion than we expected, whereas there are much less males with moderate opinion than expected.
Data cleaning
data1 <- read.spss("/Users/admin/Downloads/ESS8e02_1.sav", to.data.frame=TRUE)
data1 <- data1 %>%
select(ccgdbd, gndr)
class(data1$ccgdbd)
## [1] "factor"
data1$ccgdbd <- as.character(data1$ccgdbd)
data1$ccgdbd[data1$ccgdbd == "Extremely bad"] <- "0"
data1$ccgdbd[data1$ccgdbd == "Extremely good"] <- "10"
data1$ccgdbd <- as.numeric(data1$ccgdbd)
table(data1)
## gndr
## ccgdbd Male Female
## 0 2632 2824
## 1 1646 1960
## 2 3381 3466
## 3 3675 4209
## 4 2419 2765
## 5 3112 3486
## 6 1000 1183
## 7 844 966
## 8 539 555
## 9 146 114
## 10 170 135
data1 <- data1 %>%
filter(ccgdbd <= 10)
table(data1)
## gndr
## ccgdbd Male Female
## 0 2632 2824
## 1 1646 1960
## 2 3381 3466
## 3 3675 4209
## 4 2419 2765
## 5 3112 3486
## 6 1000 1183
## 7 844 966
## 8 539 555
## 9 146 114
## 10 170 135
str(data1)
## 'data.frame': 41232 obs. of 2 variables:
## $ ccgdbd: num 10 2 2 7 5 5 5 3 4 5 ...
## $ gndr : Factor w/ 2 levels "Male","Female": 2 1 2 1 2 2 2 2 2 2 ...
## - attr(*, "variable.labels")= Named chr [1:534] "Title of dataset" "ESS round" "Edition" "Production date" ...
## ..- attr(*, "names")= chr [1:534] "name" "essround" "edition" "proddate" ...
## - attr(*, "codepage")= int 65001
Assumptions
library(ggplot2)
library(dbplyr)
library(gridExtra)
P1 <- ggplot(data_f <- data1 %>%
filter(gndr == "Female"), aes(x=ccgdbd))+
geom_histogram(binwidth = 0.5)
P2 <- ggplot(data_m <- data1 %>%
filter(gndr == "Male"), aes(x=ccgdbd))+
geom_histogram(binwidth = 0.5)
grid.arrange(P1, P2, ncol=2)
Based on the graph we see that data is clearly not normal: there are two пики and the data is right-skewed. However: our sample is very large (that’s why for example shapiro.test function is not working), therefore, according to CLT, it should be okay?? I guess, fix my mistake if im wrong.
ggplot(data1 %>%
filter(gndr != "NA"), aes(y=ccgdbd, x=gndr))+
geom_boxplot()
leveneTest(data1$ccgdbd ~ data1$gndr)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 4.524 0.03343 *
## 41225
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Variances are equal. If we assume that CLT works, than assumptions are met??
t.test(data1$ccgdbd ~ data1$gndr)
##
## Welch Two Sample t-test
##
## data: data1$ccgdbd by data1$gndr
## t = 0.15752, df = 40610, p-value = 0.8748
## alternative hypothesis: true difference in means between group Male and group Female is not equal to 0
## 95 percent confidence interval:
## -0.03913373 0.04597326
## sample estimates:
## mean in group Male mean in group Female
## 3.266357 3.262937
P-value is very large… What happens???
Let’s check non-parametric test..
wilcox.test(data1$ccgdbd ~ data1$gndr, data = data1)
##
## Wilcoxon rank sum test with continuity correction
##
## data: data1$ccgdbd by data1$gndr
## W = 211045844, p-value = 0.4705
## alternative hypothesis: true location shift is not equal to 0
P-value is also very large… We cannot reject H0. Results of two test controdict each other. Or i did smth wrong…
library(cowsay)
say(what="Data analysis is often confusing", by = "random")
##
## -----
## Data analysis is often confusing
## ------
## \ ^__^
## \ (oo)\ ________
## (__)\ )\ /\
## ||------w|
## || ||