library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(gplots)
##
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
##
## lowess
library(ggplot2)
library(ggpubr)
library(sjPlot)
load('WVS_Cross-National_Wave_7_rData_v5_0.rdata')
cyprus <- subset(`WVS_Cross-National_Wave_7_v5_0`, B_COUNTRY == 196)
table(cyprus$Q186)
##
## -2 -1 1 2 3 4 5 6 7 8 9 10
## 25 49 219 51 60 51 142 84 89 57 26 147
table(cyprus$Q260) #1-male, 2-female
##
## 1 2
## 482 518
table(cyprus$Q262)
##
## 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43
## 10 8 10 20 26 31 29 27 28 30 27 21 20 15 15 21 15 25 20 27 9 24 11 20 27 12
## 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69
## 21 15 10 16 12 15 12 17 26 28 27 8 8 6 14 20 23 22 16 19 17 23 11 14 11 6
## 70 71 72 73 74 75 76 77 79 82 88 92
## 8 4 13 6 7 2 6 4 1 2 1 1
table(cyprus$Q275R)
##
## -2 -1 1 2 3
## 27 2 223 258 490
table(cyprus$Q206)
##
## -2 -1 1 2 3 4 5
## 22 2 575 130 31 21 219
table(cyprus$Y001)
##
## -2 0 1 2 3 4 5
## 72 71 171 284 236 131 35
cyprus$justsex <- cyprus$Q186
cyprus$gndr <- cyprus$Q260
cyprus$age <- cyprus$Q262
cyprus$edulvl <- cyprus$Q275R
cyprus$intinf <- cyprus$Q206
cyprus$postmat <-cyprus$Y001
library(dplyr)
hw <- select(cyprus, c(justsex, gndr, age, edulvl, intinf, postmat))
hw <- filter(hw, justsex != "-2" & justsex != "-1" & edulvl != "-2" & edulvl != "-1" & intinf != "-2" & intinf != "-1" & postmat != "-2")
library(table1)
##
## Attaching package: 'table1'
## The following objects are masked from 'package:base':
##
## units, units<-
table1(~justsex + gndr + age + edulvl + intinf + postmat, data = hw)
Overall (N=824) |
|
---|---|
justsex | |
Mean (SD) | 5.26 (3.15) |
Median [Min, Max] | 5.00 [1.00, 10.0] |
gndr | |
Mean (SD) | 1.51 (0.500) |
Median [Min, Max] | 2.00 [1.00, 2.00] |
age | |
Mean (SD) | 43.7 (16.2) |
Median [Min, Max] | 42.0 [18.0, 92.0] |
edulvl | |
Mean (SD) | 2.28 (0.803) |
Median [Min, Max] | 3.00 [1.00, 3.00] |
intinf | |
Mean (SD) | 2.16 (1.64) |
Median [Min, Max] | 1.00 [1.00, 5.00] |
postmat | |
Mean (SD) | 2.31 (1.25) |
Median [Min, Max] | 2.00 [0, 5.00] |
hw$gndrr <- ifelse(hw$gndr == 1, "male", "female")
table(hw$gndrr)
##
## female male
## 423 401
hw$edulvlr <- ifelse(hw$edulvl == 1, "lower",
ifelse(hw$edulvl == 2, "middle", "higher"))
table(hw$edulvlr)
##
## higher lower middle
## 414 182 228
hw$intinfr <- ifelse(hw$intinf == 1, "daily",
ifelse(hw$intinf == 2, "weekly",
ifelse(hw$intinf == 3, "monthly",
ifelse(hw$intinf == 4, "less than monthly", "never"))))
table(hw$intinfr)
##
## daily less than monthly monthly never
## 483 18 26 184
## weekly
## 113
hw$postmat <- hw$postmat + 1
Estimate a model to predict justification of sexual relationship before marriage (Q186) by age (Q262), gender (Q260), highest educational level (3 groups) (Q275R), frequency of Internet use as a source of information (Q206), 12-item index of postmaterialism (Y001), as well as an interaction between age and gender (1 point).
m_hw <- lm(justsex ~ edulvlr + intinfr + postmat + age * gndrr, data = hw)
summary(m_hw)
##
## Call:
## lm(formula = justsex ~ edulvlr + intinfr + postmat + age * gndrr,
## data = hw)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.838 -2.698 0.105 2.455 6.468
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.44028 0.57727 11.156 < 2e-16 ***
## edulvlrlower 0.43409 0.35222 1.232 0.218139
## edulvlrmiddle 1.26236 0.26741 4.721 2.77e-06 ***
## intinfrless than monthly 0.13683 0.73906 0.185 0.853161
## intinfrmonthly -0.21218 0.61855 -0.343 0.731664
## intinfrnever -0.94095 0.35154 -2.677 0.007586 **
## intinfrweekly -1.20768 0.32237 -3.746 0.000192 ***
## postmat -0.11097 0.08758 -1.267 0.205497
## age -0.01766 0.01162 -1.520 0.128926
## gndrrmale -1.90292 0.62293 -3.055 0.002326 **
## age:gndrrmale 0.03813 0.01345 2.836 0.004681 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.07 on 813 degrees of freedom
## Multiple R-squared: 0.06006, Adjusted R-squared: 0.0485
## F-statistic: 5.195 on 10 and 813 DF, p-value: 2.065e-07
Interpretation: intercept, middle level of education over higher, never using internet as a source of information and using internet weekly as a source of information over daily usage, male over female and interaction between gender and age all show significance (as p-values are less than 0.05). While for lower education level, less than monthly and monthly internet use as a source of information, post materialism index and age didn’t show significant difference.
Intercept: intercept is significant, meaning that female at 18 years, whose postmaterialism index equal to 1, using internet as a source of information daily and higher education, the justification of sex before marriage is 6.4.
Lower edu: We do not see a significant difference in justification of sex before marriage between lower and higher (baseline category) educational levels.
Middle edu: The effect of level of middle education over higher on justification of sex before marriage is significant (p-value 2.77e-06) and positive (1.26). So having a middle level of education increases the level of justificantion of sex before marriage by 1.26
Less than monthly intinf: We do not see a significant difference in justification of sex before marriage between those who use internet as a source of info less than monthly and daily (baseline category).
Monthly intinf: We do not see a significant difference in justification of sex before marriage between those who use internet as a source of info monthly and daily (baseline category).
Never intinf: The effect of never using internet as a source of info on justification of sex before marriage is significant (.007) and negative (-.94), meaning that never using net for info decreases the justification of sex before marriage by .94
Weekly intinf: The effect of using internet weekly as a source of info on justification of sex before marriage is significant (.0001) and negative (-1.21), meaning that using net for info weekly decreases the justification of sex before marriage by 1.21
Postmat: We do not see a significant difference in justification of sex before marriage between different post materialism indexes.
Age: We do not see a significant difference in justification of sex before marriage based on the age of the person. But as the interaction effect is significant we can conclude that effect of age is modified by gender.
Male gndr: There is a significant difference between male and female in justification of sex before marriage (p-value .02). For younger people being a female decreases the level of justification of sex before marriage by .02. While for men the value increases by (-.02 + .04) .02.
Model fit: R-squared is. equal to ~ 0.06, which is really low level of explanation of out dependent variable (justsex), only 6%. Thus, we need to run a few diagnostic tests to see if we can improve the model fit.
plot(m_hw,1)
library(MASS)
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
boxcox(m_hw)
Interpretation: the red line is almost coincides with the dashed line, however, there is some break in the linearity around 5, so we can conclude that the linearity assumption is violated. We can also already notice some outliers in the left upper corner: 22364, 22541 and 22173. As for boxcox, the lambda is in between 0 and 1. Possibly, there is a need for log transformation.
library(olsrr)
##
## Attaching package: 'olsrr'
## The following object is masked from 'package:MASS':
##
## cement
## The following object is masked from 'package:datasets':
##
## rivers
ols_test_normality(m_hw)
## -----------------------------------------------
## Test Statistic pvalue
## -----------------------------------------------
## Shapiro-Wilk 0.9658 0.0000
## Kolmogorov-Smirnov 0.0729 3e-04
## Cramer-von Mises 53.0791 0.0000
## Anderson-Darling 7.004 0.0000
## -----------------------------------------------
hist (m_hw$residuals, breaks = 20)
car::qqPlot(m_hw, main="Q-Q Plot")
## 22364 22541
## 254 418
car::outlierTest(m_hw)
## No Studentized residuals with Bonferroni p < 0.05
## Largest |rstudent|:
## rstudent unadjusted p-value Bonferroni p
## 22364 2.125716 0.033828 NA
Interpretation: residuals are non-normally distributed as p-value in smaller than 0.05. The histogram confirms non-normal distribution of residuals, as well as qq plot. On the qq plot there are also some outliers visible: 22541 and 22364. The Bonferroni p gives out NA, which can suggest that it is not significant.
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
spreadLevelPlot(m_hw)
##
## Suggested power transformation: 0.8888452
car::ncvTest(m_hw)
## Non-constant Variance Score Test
## Variance formula: ~ fitted.values
## Chisquare = 0.8241778, Df = 1, p = 0.36396
Interpretation: the formal test results show p-value bigger than 0.05 (~0.36), thus we cannot reject the null hypothesis (that there is homoscedasticity in residuals), meaning that there is no heteroscedasticity in the residuals. While on the spread Level Plot we can notice that blue line is not horizontal, there is some angle with which it rises.
car::vif(m_hw, type = 'predictor')
## GVIFs computed for predictors
## GVIF Df GVIF^(1/(2*Df)) Interacts With Other Predictors
## edulvlr 1.696632 2 1.141292 -- intinfr, postmat, age, gndrr
## intinfr 1.770956 4 1.074054 -- edulvlr, postmat, age, gndrr
## postmat 1.040405 1 1.020003 -- edulvlr, intinfr, age, gndrr
## age 1.930418 3 1.115857 gndrr edulvlr, intinfr, postmat
## gndrr 1.930418 3 1.115857 age edulvlr, intinfr, postmat
Interpretation: All variables have lower than 4 GVIF scores, thus they do not explain the same parts of the model, and we do not need to delete any.
plot(hatvalues(m_hw), rstudent(m_hw), type='n')
abline(h=c(-2, 2), lty=2)
abline(v=c(2,3)*3/110, lty=2)
cook <- sqrt(cooks.distance(m_hw))
points(hatvalues(m_hw), rstudent(m_hw), cex=10*cook/max(cook))
text(hatvalues(m_hw), rstudent(m_hw), rownames(hw))
ols_plot_cooksd_bar(m_hw)
Interpretation: On the first plot we can see quite a
lot of outliers, e.g. 22615 or 22830, 22661, etc. On the Cooks plot we
can see quite a lot of outliers as well.
hw_new = hw[-which(rownames(hw)=="22350"|
rownames(hw)=="22830"|
rownames(hw)=="22661"|
rownames(hw)=="22167"|
rownames(hw)=="22753"|
rownames(hw)=="22660"|
rownames(hw)=="22455"|
rownames(hw)=="23055"|
rownames(hw)=="23062"|
rownames(hw)=="22364"|
rownames(hw)=="22541"|
rownames(hw)=="22173"|
rownames(hw)=="22615"|
rownames(hw)=="22511"|
rownames(hw)=="22714"|
rownames(hw)=="22676"),]
Here I deleted the biggest outliers that showed themselves on different plots.
m_new <- lm(justsex ~ edulvlr + intinfr + postmat + age * gndrr, data = hw_new)
summary(m_new)
##
## Call:
## lm(formula = justsex ~ edulvlr + intinfr + postmat + age * gndrr,
## data = hw_new)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.9770 -2.6014 0.0427 2.3716 5.8679
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.67804 0.57734 11.567 < 2e-16 ***
## edulvlrlower 0.47194 0.35154 1.342 0.17982
## edulvlrmiddle 1.42150 0.26733 5.317 1.37e-07 ***
## intinfrless than monthly 1.78936 0.92717 1.930 0.05397 .
## intinfrmonthly 0.07794 0.64789 0.120 0.90427
## intinfrnever -0.91771 0.35056 -2.618 0.00902 **
## intinfrweekly -1.32695 0.32095 -4.135 3.93e-05 ***
## postmat -0.12926 0.08751 -1.477 0.14002
## age -0.02258 0.01165 -1.937 0.05308 .
## gndrrmale -2.18066 0.61995 -3.517 0.00046 ***
## age:gndrrmale 0.04345 0.01339 3.246 0.00122 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.033 on 799 degrees of freedom
## Multiple R-squared: 0.07928, Adjusted R-squared: 0.06776
## F-statistic: 6.88 on 10 and 799 DF, p-value: 2.258e-10
sjPlot::tab_model(m_hw, m_new)
 | justsex | justsex | ||||
---|---|---|---|---|---|---|
Predictors | Estimates | CI | p | Estimates | CI | p |
(Intercept) | 6.44 | 5.31 – 7.57 | <0.001 | 6.68 | 5.54 – 7.81 | <0.001 |
edulvlr [lower] | 0.43 | -0.26 – 1.13 | 0.218 | 0.47 | -0.22 – 1.16 | 0.180 |
edulvlr [middle] | 1.26 | 0.74 – 1.79 | <0.001 | 1.42 | 0.90 – 1.95 | <0.001 |
intinfr [less than monthly] |
0.14 | -1.31 – 1.59 | 0.853 | 1.79 | -0.03 – 3.61 | 0.054 |
intinfr [monthly] | -0.21 | -1.43 – 1.00 | 0.732 | 0.08 | -1.19 – 1.35 | 0.904 |
intinfr [never] | -0.94 | -1.63 – -0.25 | 0.008 | -0.92 | -1.61 – -0.23 | 0.009 |
intinfr [weekly] | -1.21 | -1.84 – -0.57 | <0.001 | -1.33 | -1.96 – -0.70 | <0.001 |
postmat | -0.11 | -0.28 – 0.06 | 0.205 | -0.13 | -0.30 – 0.04 | 0.140 |
age | -0.02 | -0.04 – 0.01 | 0.129 | -0.02 | -0.05 – 0.00 | 0.053 |
gndrr [male] | -1.90 | -3.13 – -0.68 | 0.002 | -2.18 | -3.40 – -0.96 | <0.001 |
age × gndrr [male] | 0.04 | 0.01 – 0.06 | 0.005 | 0.04 | 0.02 – 0.07 | 0.001 |
Observations | 824 | 810 | ||||
R2 / R2 adjusted | 0.060 / 0.049 | 0.079 / 0.068 |
Interpretation:
Intercept: intercept is significant, meaning that female at 18 years, whose postmaterialism index equal to 1, using internet as a source of information daily and higher education, the justification of sex before marriage is 6.68.
Lower edu: We do not see a significant difference in justification of sex before marriage between lower and higher (baseline category) educational levels.
Middle edu: The effect of level of middle education over higher on justification of sex before marriage is significant (p-value 1.37e-07) and positive (1.42). So having a middle level of education increases the level of justificantion of sex before marriage by 1.42
Less than monthly intinf: We do not see a significant difference in justification of sex before marriage between those who use internet as a source of info less than monthly and daily (baseline category).
Monthly intinf: We do not see a significant difference in justification of sex before marriage between those who use internet as a source of info monthly and daily (baseline category).
Never intinf: The effect of never using internet as a source of info on justification of sex before marriage is significant (.009) and negative ( -.91), meaning that never using net for info decreases the justification of sex before marriage by .91
Weekly intinf: The effect of using internet weekly as a source of info on justification of sex before marriage is significant (3.93e-05) and negative (-1.33), meaning that using net for info weekly decreases the justification of sex before marriage by 1.33
Postmat: We do not see a significant difference in justification of sex before marriage between different post materialism indexes.
Age: We do not see a significant difference in justification of sex
before marriage based on the age of the person.
Male gndr: There is a significant difference between male and female in
justification of sex before marriage (p-value <.001). For younger
people being a female decreases the level of justification of sex before
marriage by -.02. While for men the value increases by .02.
Model fit increased from 0.06 to 0.08 by deleting the outliers.
library(nortest)
car::qqPlot(m_new, main="Q-Q Plot")
## 22232 22748
## 129 569
hist (m_new$residuals, breaks = 20)
ad.test(m_new$residuals)
##
## Anderson-Darling normality test
##
## data: m_new$residuals
## A = 6.0592, p-value = 6.637e-15
Tests and plots still do not show the decent picture and normal distribution of data, thus, we can conclude that more adjustments are needed.
sjPlot::plot_model(m_new, type = 'pred', terms = c('age', 'gndrr'))
Interpretation: here we can see that with the increase of age women tend to justify sex before marriage less, while for men the effect is opposite: men’s level of justification of sex before marriage with age increases visibly.