library(foreign)
wvs = read.spss("wvs RU.sav", to.data.frame=TRUE)
names(wvs)
## [1] "S003" "S004" "S007" "S009" "S010" "S018" "S018a"
## [8] "S019" "S019a" "S021" "S021A" "S024" "S024A" "S025"
## [15] "S025A" "S026" "V1" "V1A" "V1B" "V2" "V2A"
## [22] "V3" "V4" "V5" "V6" "V7" "V8" "V9"
## [29] "V10" "V11" "V12" "V13" "V14" "V15" "V16"
## [36] "V17" "V18" "V19" "V20" "V21" "V22" "V23"
## [43] "V24" "V25" "V26" "V27" "V28" "V29" "V30"
## [50] "V31" "V32" "V33" "V34" "V35" "V36" "V37"
## [57] "V38" "V39" "V40" "V41" "V42" "V43" "V44"
## [64] "V45" "V46" "V47" "V48" "V49" "V50" "V51"
## [71] "V52" "V53" "V54" "V55" "V56" "V57" "V58"
## [78] "V59" "V60" "V61" "V62" "V63" "V64" "V65"
## [85] "V66" "V67" "V68" "V69" "V69HK" "V70" "V70HK"
## [92] "V71" "V72" "V73" "V74HK" "V74" "V73HK" "V75"
## [99] "V76" "V77" "V78" "V79" "V80" "V81" "V82"
## [106] "V83" "V84" "V85" "V86" "V87" "V88" "V89"
## [113] "V90" "V91" "V92" "V93" "V94" "V95" "V96"
## [120] "V97" "V98" "V99" "V100" "V101" "V102" "V103"
## [127] "V104" "V105" "V106" "V107" "V108" "V109" "V110"
## [134] "V111" "V112" "V113" "V114" "V115" "V116" "V117"
## [141] "V118" "V119" "V120" "V121" "V122" "V123" "V124"
## [148] "V125" "V126" "V127" "V128" "V129" "V130" "V131"
## [155] "V132" "V133" "V134" "V135" "V136" "V137" "V138"
## [162] "V139" "V140" "V141" "V142" "V143" "V144" "V145"
## [169] "V146" "V147" "V148" "V149" "V150" "V151" "V152"
## [176] "V153" "V154" "V155" "V156" "V157" "V158" "V159"
## [183] "V160" "V161" "V162" "V163" "V164" "V165" "V166"
## [190] "V167" "V168" "V169" "V170" "V171" "V172" "V173"
## [197] "V174" "V175" "V176" "V177" "V178" "V179" "V180"
## [204] "V181" "V182" "V183" "V184" "V185" "V186" "V187"
## [211] "V188" "V189" "V190" "V191" "V192" "V193" "V194"
## [218] "V195" "V196" "V197" "V198" "V199" "V200" "V201"
## [225] "V202" "V203" "V204" "V205" "V206" "V207" "V208"
## [232] "V209" "V210" "V211" "V212" "V214" "V215" "V216"
## [239] "V217" "V218" "V219" "V220" "V221" "V222" "V223"
## [246] "V224" "V225" "V226" "V227" "V228" "V229" "V230"
## [253] "V231" "V232" "V233" "V233A" "V234" "V235" "V236"
## [260] "V237" "V238" "V239" "V240" "V241" "V242" "V243"
## [267] "V244" "V245" "V246" "V247" "V248" "V249" "V250"
## [274] "V251" "V252" "V252B" "V253" "V253CS" "V254" "V255"
## [281] "V255CS" "V256" "V257" "V258" "V259A" "V259" "V260"
## [288] "number" "country"
wvs.ext <- data.frame(wvs$V237, wvs$V55, wvs$V238, wvs$V253,
wvs$V10, wvs$V22, wvs$S009, wvs$V23,
wvs$V46, wvs$V205,
wvs$V4,wvs$V5,wvs$V6,wvs$V7,wvs$V8,wvs$V9)
names (wvs.ext) <- c ("age", "marital", "edu", "income",
"happy", "satisf", "country", "trust",
"choice", "divorce", "Family",
"Friends", "LeisureTime", "Politics",
"Work", "Religion")
For example:
class(wvs.ext$satisf)
## [1] "factor"
table(wvs.ext$satisf)
##
## Dissatisfied 2 3 4 5
## 767 427 902 869 2262
## 6 7 8 9 Satisfied
## 1135 1304 1363 621 941
wvs.ext$satisf1 <- ifelse(wvs.ext$satisf=="Satisfied",10,
ifelse(wvs.ext$satisf=="Dissatisfied",1,
wvs.ext$satisf))
table(wvs.ext$satisf, wvs.ext$satisf1)
##
## 1 2 3 4 5 6 7 8 9 10
## Dissatisfied 767 0 0 0 0 0 0 0 0 0
## 2 0 427 0 0 0 0 0 0 0 0
## 3 0 0 902 0 0 0 0 0 0 0
## 4 0 0 0 869 0 0 0 0 0 0
## 5 0 0 0 0 2262 0 0 0 0 0
## 6 0 0 0 0 0 1135 0 0 0 0
## 7 0 0 0 0 0 0 1304 0 0 0
## 8 0 0 0 0 0 0 0 1363 0 0
## 9 0 0 0 0 0 0 0 0 621 0
## Satisfied 0 0 0 0 0 0 0 0 0 941
wvs.ext$satisf <- as.numeric(wvs.ext$satisf)
plot(wvs.ext$happy)
wvs.ext$happy0 <- ifelse(wvs.ext$happy=="Not at all happy",1,
ifelse(wvs.ext$happy=="Not very happy",2,
ifelse(wvs.ext$happy=="Quite happy",3,
ifelse(wvs.ext$happy=="Very happy",4, NA))))
table(wvs.ext$happy, wvs.ext$happy0)
##
## 1 2 3 4
## Very happy 0 0 0 1079
## Quite happy 0 0 5516 0
## Not very happy 0 3161 0 0
## Not at all happy 507 0 0 0
wvs.ext$income1 <- ifelse (wvs.ext$income=="Eigth step",8,
ifelse(wvs.ext$income=="Fifth step",5,
ifelse(wvs.ext$income=="Fourth step",4,
ifelse(wvs.ext$income=="Lower step",1,
ifelse(wvs.ext$income=="Nineth step",9,
ifelse(wvs.ext$income=="second step",2,
ifelse(wvs.ext$income=="Seventh step",7,
ifelse(wvs.ext$income=="Sixth step",6,
ifelse(wvs.ext$income=="Tenth step",10,
ifelse(wvs.ext$income=="Third step",3,NA))))))))))
table(wvs.ext$income, wvs.ext$income1)
##
## 1 2 3 4 5 6 7 8 9 10
## Lower step 413 0 0 0 0 0 0 0 0 0
## second step 0 718 0 0 0 0 0 0 0 0
## Third step 0 0 1363 0 0 0 0 0 0 0
## Fourth step 0 0 0 1225 0 0 0 0 0 0
## Fifth step 0 0 0 0 1301 0 0 0 0 0
## Sixth step 0 0 0 0 0 872 0 0 0 0
## Seventh step 0 0 0 0 0 0 1002 0 0 0
## Eigth step 0 0 0 0 0 0 0 772 0 0
## Nineth step 0 0 0 0 0 0 0 0 512 0
## Tenth step 0 0 0 0 0 0 0 0 0 448
table(wvs.ext$choice)
##
## None at all 2 3 4 5
## 552 307 531 650 1786
## 6 7 8 9 A great deal
## 894 1021 1175 568 1239
wvs.ext$choice1 <- ifelse (wvs.ext$choice=="None at all",1,
ifelse(wvs.ext$choice=="A great deal",10,
wvs.ext$choice))
table(wvs.ext$choice1,wvs.ext$choice)
##
## None at all 2 3 4 5 6 7 8 9 A great deal
## 1 552 0 0 0 0 0 0 0 0 0
## 2 0 307 0 0 0 0 0 0 0 0
## 3 0 0 531 0 0 0 0 0 0 0
## 4 0 0 0 650 0 0 0 0 0 0
## 5 0 0 0 0 1786 0 0 0 0 0
## 6 0 0 0 0 0 894 0 0 0 0
## 7 0 0 0 0 0 0 1021 0 0 0
## 8 0 0 0 0 0 0 0 1175 0 0
## 9 0 0 0 0 0 0 0 0 568 0
## 10 0 0 0 0 0 0 0 0 0 1239
wvs.ext$happyIND<- rowMeans(wvs.ext[c('happy0','satisf1')], na.rm=T)
summary(wvs.ext$happyIND)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 1.000 3.000 4.000 4.262 5.500 10.000 60
hist(wvs.ext$happyIND)
class(wvs.ext$happyIND)
## [1] "numeric"
Standardization is the process of putting differeent variables on the same scale, as in this way it is easier to compare them
wvs.ext$happyINDscale <- scale (wvs.ext$happyIND)
hist(wvs.ext$happyINDscale)
“happyINDscale” is the dependent variable.
model1 <- lm(wvs.ext$happyINDscale~ as.numeric(wvs.ext$age) +
as.numeric(wvs.ext$income1) + as.numeric(wvs.ext$choice))
summary(model1)
##
## Call:
## lm(formula = wvs.ext$happyINDscale ~ as.numeric(wvs.ext$age) +
## as.numeric(wvs.ext$income1) + as.numeric(wvs.ext$choice))
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.9537 -0.5839 0.0046 0.5627 4.6403
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.8561183 0.0383086 -22.35 <2e-16 ***
## as.numeric(wvs.ext$age) -0.0090692 0.0006403 -14.16 <2e-16 ***
## as.numeric(wvs.ext$income1) 0.0482466 0.0040943 11.78 <2e-16 ***
## as.numeric(wvs.ext$choice) 0.1280611 0.0039149 32.71 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8773 on 7928 degrees of freedom
## (2849 observations deleted due to missingness)
## Multiple R-squared: 0.1816, Adjusted R-squared: 0.1813
## F-statistic: 586.6 on 3 and 7928 DF, p-value: < 2.2e-16
Interpretation of the results of regression:
- p-value estimates show that all the predictors (age,income, choice are significant)
- With one unit increase in age, the happiness level decreases by 0,0090692 (The older the person the lower his or her level of happiness)
- With one unit increase in income, the happiness level increases by 0.0482466 (The richer the person the happier he or she is)
- With one unit increase in choice variable^ the happiness level increases by 0.1280611 (The more freedom the one has, the happier he or she is)
par(mfrow=c(2,2))
plot(model1)
library(car)
## Loading required package: carData
outlierTest(model1)
## rstudent unadjusted p-value Bonferroni p
## 9356 5.300615 1.1852e-07 0.00094012
## 4910 5.169721 2.4017e-07 0.00190500
## 4162 5.051331 4.4853e-07 0.00355780
## 980 4.797230 1.6381e-06 0.01299400
## 3625 4.796166 1.6468e-06 0.01306300
## 8979 4.723898 2.3535e-06 0.01866800
## 4287 4.565357 5.0616e-06 0.04014900
qqPlot(model1, main="QQ plot")
## [1] 4910 9356
leveragePlots(model1)
We have two extreme outliers 4910 and 9356
- Ok, the older the person is, the lower the level of happiness is;
- and the richer the person is, the happier hi/she is.
- And we can suggest that rich people are happier even in old age.
wvs.ext$age <- as.numeric(wvs.ext$age)
wvs.ext$income1 <- as.numeric(wvs.ext$income1)
wvs.ext$choice <- as.numeric(wvs.ext$choice)
model2 <- lm(wvs.ext$happyINDscale~ choice + age * income1 ,
data= wvs.ext)
summary(model2)
##
## Call:
## lm(formula = wvs.ext$happyINDscale ~ choice + age * income1,
## data = wvs.ext)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.9269 -0.5843 0.0024 0.5612 4.6413
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.8167208 0.0534746 -15.273 < 2e-16 ***
## choice 0.1278022 0.0039225 32.581 < 2e-16 ***
## age -0.0104389 0.0014465 -7.217 5.82e-13 ***
## income1 0.0407445 0.0081998 4.969 6.87e-07 ***
## age:income1 0.0002793 0.0002645 1.056 0.291
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8772 on 7927 degrees of freedom
## (2849 observations deleted due to missingness)
## Multiple R-squared: 0.1818, Adjusted R-squared: 0.1813
## F-statistic: 440.2 on 4 and 7927 DF, p-value: < 2.2e-16
library(sjPlot)
## Install package "strengejacke" from GitHub (`devtools::install_github("strengejacke/strengejacke")`) to load all sj-packages at once!
plot_model(model2, type = "int")
The interactive effect is insignificant, which means that with age the level of happiness decreases equally for both the poor and the rich.
If older people feel they have completely free choice and control over their lives,are they happier than those who think “no choice at all”?
model3 <- lm(wvs.ext$happyINDscale~ age * choice + income1 ,
data= wvs.ext)
summary(model3)
##
## Call:
## lm(formula = wvs.ext$happyINDscale ~ age * choice + income1,
## data = wvs.ext)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.8555 -0.5720 -0.0009 0.5641 4.6403
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.6454136 0.0570204 -11.319 < 2e-16 ***
## age -0.0158706 0.0015073 -10.529 < 2e-16 ***
## choice 0.0943615 0.0078117 12.080 < 2e-16 ***
## income1 0.0472345 0.0040932 11.540 < 2e-16 ***
## age:choice 0.0011569 0.0002322 4.983 6.4e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8759 on 7927 degrees of freedom
## (2849 observations deleted due to missingness)
## Multiple R-squared: 0.1842, Adjusted R-squared: 0.1838
## F-statistic: 447.5 on 4 and 7927 DF, p-value: < 2.2e-16
plot_model(model3, type = "int")
Interpretation of the result:
With age the level of hahappiness for those who have no freedom at all decreases more rapidly, than for those who have a great deal of freedom
Now select one or two variables (from “wvs.ext” dataset), add interactive effect in model2, build a plot and interpret a new result.
model4 <- lm(wvs.ext$happyINDscale~ income1 + choice + age * trust ,
data= wvs.ext)
summary(model4)
##
## Call:
## lm(formula = wvs.ext$happyINDscale ~ income1 + choice + age *
## trust, data = wvs.ext)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.9372 -0.5815 0.0061 0.5595 4.4663
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.824860 0.052183 -15.807 < 2e-16 ***
## income1 0.048832 0.004143 11.786 < 2e-16 ***
## choice 0.126242 0.003970 31.801 < 2e-16 ***
## age -0.006840 0.001230 -5.561 2.77e-08 ***
## trustCanґt be too careful -0.027938 0.046373 -0.602 0.547
## age:trustCanґt be too careful -0.003164 0.001440 -2.197 0.028 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8725 on 7656 degrees of freedom
## (3119 observations deleted due to missingness)
## Multiple R-squared: 0.1832, Adjusted R-squared: 0.1827
## F-statistic: 343.5 on 5 and 7656 DF, p-value: < 2.2e-16
plot_model(model4, type = "int")
Interpretation of the result:
With age the level of happiness for those, who don’t trust people around them decreases more rapidly,than for those, who believe that most people can be trusted (maybe people’s mistrust leads to higher anxiety levels and as a result makes their psychological state not that stable, making their happiness level to decrease)