The practical task includes three types of action:
If there are no errors in the script, you can knit a document, and send me doc, Html or pdf file.
Open the notebook and read about the variables that we work with.
## [1] "S003" "S004" "S007" "S009" "S010" "S018" "S018a"
## [8] "S019" "S019a" "S021" "S021A" "S024" "S024A" "S025"
## [15] "S025A" "S026" "V1" "V1A" "V1B" "V2" "V2A"
## [22] "V3" "V4" "V5" "V6" "V7" "V8" "V9"
## [29] "V10" "V11" "V12" "V13" "V14" "V15" "V16"
## [36] "V17" "V18" "V19" "V20" "V21" "V22" "V23"
## [43] "V24" "V25" "V26" "V27" "V28" "V29" "V30"
## [50] "V31" "V32" "V33" "V34" "V35" "V36" "V37"
## [57] "V38" "V39" "V40" "V41" "V42" "V43" "V44"
## [64] "V45" "V46" "V47" "V48" "V49" "V50" "V51"
## [71] "V52" "V53" "V54" "V55" "V56" "V57" "V58"
## [78] "V59" "V60" "V61" "V62" "V63" "V64" "V65"
## [85] "V66" "V67" "V68" "V69" "V69HK" "V70" "V70HK"
## [92] "V71" "V72" "V73" "V74HK" "V74" "V73HK" "V75"
## [99] "V76" "V77" "V78" "V79" "V80" "V81" "V82"
## [106] "V83" "V84" "V85" "V86" "V87" "V88" "V89"
## [113] "V90" "V91" "V92" "V93" "V94" "V95" "V96"
## [120] "V97" "V98" "V99" "V100" "V101" "V102" "V103"
## [127] "V104" "V105" "V106" "V107" "V108" "V109" "V110"
## [134] "V111" "V112" "V113" "V114" "V115" "V116" "V117"
## [141] "V118" "V119" "V120" "V121" "V122" "V123" "V124"
## [148] "V125" "V126" "V127" "V128" "V129" "V130" "V131"
## [155] "V132" "V133" "V134" "V135" "V136" "V137" "V138"
## [162] "V139" "V140" "V141" "V142" "V143" "V144" "V145"
## [169] "V146" "V147" "V148" "V149" "V150" "V151" "V152"
## [176] "V153" "V154" "V155" "V156" "V157" "V158" "V159"
## [183] "V160" "V161" "V162" "V163" "V164" "V165" "V166"
## [190] "V167" "V168" "V169" "V170" "V171" "V172" "V173"
## [197] "V174" "V175" "V176" "V177" "V178" "V179" "V180"
## [204] "V181" "V182" "V183" "V184" "V185" "V186" "V187"
## [211] "V188" "V189" "V190" "V191" "V192" "V193" "V194"
## [218] "V195" "V196" "V197" "V198" "V199" "V200" "V201"
## [225] "V202" "V203" "V204" "V205" "V206" "V207" "V208"
## [232] "V209" "V210" "V211" "V212" "V214" "V215" "V216"
## [239] "V217" "V218" "V219" "V220" "V221" "V222" "V223"
## [246] "V224" "V225" "V226" "V227" "V228" "V229" "V230"
## [253] "V231" "V232" "V233" "V233A" "V234" "V235" "V236"
## [260] "V237" "V238" "V239" "V240" "V241" "V242" "V243"
## [267] "V244" "V245" "V246" "V247" "V248" "V249" "V250"
## [274] "V251" "V252" "V252B" "V253" "V253CS" "V254" "V255"
## [281] "V255CS" "V256" "V257" "V258" "V259A" "V259" "V260"
## [288] "number" "country"
Find the variables.
wvs.ext <- data.frame(wvs$V237, wvs$V55, wvs$V238, wvs$V253,
wvs$V10, wvs$V22, wvs$S009, wvs$V23,
wvs$V46, wvs$V205,
wvs$V4,wvs$V5,wvs$V6,wvs$V7,wvs$V8,wvs$V9)
names (wvs.ext) <- c ("age", "marital", "edu", "income",
"happy", "satisf", "country", "trust",
"choice", "divorce", "Family",
"Friends", "LeisureTime", "Politics",
"Work", "Religion")
Check the type of variables and RECODE each variable if necessary. For example.
class(wvs.ext$satisf)
## [1] "factor"
table(wvs.ext$satisf)
##
## Dissatisfied 2 3 4 5
## 767 427 902 869 2262
## 6 7 8 9 Satisfied
## 1135 1304 1363 621 941
wvs.ext$satisf1 <- ifelse(wvs.ext$satisf=="Satisfied",10,
ifelse(wvs.ext$satisf=="Dissatisfied",1,
wvs.ext$satisf))
table(wvs.ext$satisf, wvs.ext$satisf1)
##
## 1 2 3 4 5 6 7 8 9 10
## Dissatisfied 767 0 0 0 0 0 0 0 0 0
## 2 0 427 0 0 0 0 0 0 0 0
## 3 0 0 902 0 0 0 0 0 0 0
## 4 0 0 0 869 0 0 0 0 0 0
## 5 0 0 0 0 2262 0 0 0 0 0
## 6 0 0 0 0 0 1135 0 0 0 0
## 7 0 0 0 0 0 0 1304 0 0 0
## 8 0 0 0 0 0 0 0 1363 0 0
## 9 0 0 0 0 0 0 0 0 621 0
## Satisfied 0 0 0 0 0 0 0 0 0 941
wvs.ext$satisf <- as.numeric(wvs.ext$satisf)
## happy
plot(wvs.ext$happy)
wvs.ext$happy0 <- ifelse(wvs.ext$happy=="Not at all happy",1,
ifelse(wvs.ext$happy=="Not very happy",2,
ifelse(wvs.ext$happy=="Quite happy",3,
ifelse(wvs.ext$happy=="Very happy",4, NA))))
table(wvs.ext$happy, wvs.ext$happy0)
##
## 1 2 3 4
## Very happy 0 0 0 1079
## Quite happy 0 0 5516 0
## Not very happy 0 3161 0 0
## Not at all happy 507 0 0 0
## income
wvs.ext$income1 <- ifelse (wvs.ext$income=="Eigth step",8,
ifelse(wvs.ext$income=="Fifth step",5,
ifelse(wvs.ext$income=="Fourth step",4,
ifelse(wvs.ext$income=="Lower step",1,
ifelse(wvs.ext$income=="Nineth step",9,
ifelse(wvs.ext$income=="second step",2,
ifelse(wvs.ext$income=="Seventh step",7,
ifelse(wvs.ext$income=="Sixth step",6,
ifelse(wvs.ext$income=="Tenth step",10,
ifelse(wvs.ext$income=="Third step",3,NA))))))))))
table(wvs.ext$income, wvs.ext$income1)
##
## 1 2 3 4 5 6 7 8 9 10
## Lower step 413 0 0 0 0 0 0 0 0 0
## second step 0 718 0 0 0 0 0 0 0 0
## Third step 0 0 1363 0 0 0 0 0 0 0
## Fourth step 0 0 0 1225 0 0 0 0 0 0
## Fifth step 0 0 0 0 1301 0 0 0 0 0
## Sixth step 0 0 0 0 0 872 0 0 0 0
## Seventh step 0 0 0 0 0 0 1002 0 0 0
## Eigth step 0 0 0 0 0 0 0 772 0 0
## Nineth step 0 0 0 0 0 0 0 0 512 0
## Tenth step 0 0 0 0 0 0 0 0 0 448
## choice
table(wvs.ext$choice)
##
## None at all 2 3 4 5
## 552 307 531 650 1786
## 6 7 8 9 A great deal
## 894 1021 1175 568 1239
wvs.ext$choice1 <- ifelse (wvs.ext$choice=="None at all",1,
ifelse(wvs.ext$choice=="A great deal",10,
wvs.ext$choice))
table(wvs.ext$choice1,wvs.ext$choice)
##
## None at all 2 3 4 5 6 7 8 9 A great deal
## 1 552 0 0 0 0 0 0 0 0 0
## 2 0 307 0 0 0 0 0 0 0 0
## 3 0 0 531 0 0 0 0 0 0 0
## 4 0 0 0 650 0 0 0 0 0 0
## 5 0 0 0 0 1786 0 0 0 0 0
## 6 0 0 0 0 0 894 0 0 0 0
## 7 0 0 0 0 0 0 1021 0 0 0
## 8 0 0 0 0 0 0 0 1175 0 0
## 9 0 0 0 0 0 0 0 0 568 0
## 10 0 0 0 0 0 0 0 0 0 1239
Creating index of happiness
wvs.ext$happyIND<- rowMeans(wvs.ext[c('happy0','satisf1')], na.rm=T)
summary(wvs.ext$happyIND)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 1.000 3.000 4.000 4.262 5.500 10.000 60
hist(wvs.ext$happyIND)
class(wvs.ext$happyIND)
## [1] "numeric"
Can you remember what standardization means and why it necessary?
wvs.ext$happyINDscale <- scale (wvs.ext$happyIND)
hist(wvs.ext$happyINDscale)
“happyINDscale” is the dependent variable.
model1 <- lm(wvs.ext$happyINDscale~ as.numeric(wvs.ext$age) +
as.numeric(wvs.ext$income1) + as.numeric(wvs.ext$choice))
summary(model1)
##
## Call:
## lm(formula = wvs.ext$happyINDscale ~ as.numeric(wvs.ext$age) +
## as.numeric(wvs.ext$income1) + as.numeric(wvs.ext$choice))
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.9537 -0.5839 0.0046 0.5627 4.6403
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.8561183 0.0383086 -22.35 <2e-16 ***
## as.numeric(wvs.ext$age) -0.0090692 0.0006403 -14.16 <2e-16 ***
## as.numeric(wvs.ext$income1) 0.0482466 0.0040943 11.78 <2e-16 ***
## as.numeric(wvs.ext$choice) 0.1280611 0.0039149 32.71 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8773 on 7928 degrees of freedom
## (2849 observations deleted due to missingness)
## Multiple R-squared: 0.1816, Adjusted R-squared: 0.1813
## F-statistic: 586.6 on 3 and 7928 DF, p-value: < 2.2e-16
Interpret the results of regression.
Here we look at the model with relation of 3 predictors (age and income of a respondent and their feeling of having choice how to live or not) with one outcome - respondet’s level of happiness. This model is significant (p < .05), explains about 18% of the variability of happiness, which is not bad! As for the predictors:
outliers and leverage?
In this model we do have a couple of outliers (4910, 9356), and don’t have levarages (which would influence the results of the model)
plot(model1)
library(car)
## Loading required package: carData
qqPlot(model1, main="QQ Plot")
## [1] 4910 9356
INTERACTION MODELS
Ok, the older the person is, the lower the level of happiness is; and the richer the person is, the happier hi/she is. And we can suggest that rich people are happier even in old age. Let’s test this hypothesis! (as i said, too!)
wvs.ext$age <- as.numeric(wvs.ext$age)
wvs.ext$income1 <- as.numeric(wvs.ext$income1)
wvs.ext$choice <- as.numeric(wvs.ext$choice)
model2 <- lm(wvs.ext$happyINDscale~ choice + age * income1 ,
data= wvs.ext)
summary(model2)
##
## Call:
## lm(formula = wvs.ext$happyINDscale ~ choice + age * income1,
## data = wvs.ext)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.9269 -0.5843 0.0024 0.5612 4.6413
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.8167208 0.0534746 -15.273 < 2e-16 ***
## choice 0.1278022 0.0039225 32.581 < 2e-16 ***
## age -0.0104389 0.0014465 -7.217 5.82e-13 ***
## income1 0.0407445 0.0081998 4.969 6.87e-07 ***
## age:income1 0.0002793 0.0002645 1.056 0.291
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8772 on 7927 degrees of freedom
## (2849 observations deleted due to missingness)
## Multiple R-squared: 0.1818, Adjusted R-squared: 0.1813
## F-statistic: 440.2 on 4 and 7927 DF, p-value: < 2.2e-16
library(sjPlot)
# browseVignettes("sjPlot")
plot_model(model2, type = "int")
The interaction effect is insignificant, which means that with age the level of happiness decreases equally for both the poor and the rich.
What about the choice? If older people feel they have completely free choice and control over their lives, are they happier than those who think “no choice at all”?
model3 <- lm(wvs.ext$happyINDscale~ age * choice + income1 ,
data= wvs.ext)
summary(model3)
##
## Call:
## lm(formula = wvs.ext$happyINDscale ~ age * choice + income1,
## data = wvs.ext)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.8555 -0.5720 -0.0009 0.5641 4.6403
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.6454136 0.0570204 -11.319 < 2e-16 ***
## age -0.0158706 0.0015073 -10.529 < 2e-16 ***
## choice 0.0943615 0.0078117 12.080 < 2e-16 ***
## income1 0.0472345 0.0040932 11.540 < 2e-16 ***
## age:choice 0.0011569 0.0002322 4.983 6.4e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8759 on 7927 degrees of freedom
## (2849 observations deleted due to missingness)
## Multiple R-squared: 0.1842, Adjusted R-squared: 0.1838
## F-statistic: 447.5 on 4 and 7927 DF, p-value: < 2.2e-16
plot_model(model3, type = "int")
Please interpret the result.
Yes, for now the resuls are significant: people, who do have feeling of choice in their lives are happier, than those, aho don’t, even if their level of happiness is descreasing as they become older.
Now select one or two variables (from “wvs.ext” dataset), add interaction effect in model1, build a plot and interpret a new result.
## trust
summary(wvs.ext$trust)
## Most people can be trusted CanТ‘t be too careful
## 2821 7395
## NA's
## 565
table(wvs.ext$trust)
##
## Most people can be trusted CanТ‘t be too careful
## 2821 7395
wvs.ext$trust1 <- ifelse (wvs.ext$trust=="CanТ‘t be too careful",1,
ifelse(wvs.ext$trust=="Most people can be trusted",10,
wvs.ext$trust))
table(wvs.ext$trust1,wvs.ext$trust)
##
## Most people can be trusted CanТ‘t be too careful
## 1 0 7395
## 10 2821 0
model77 <- lm(wvs.ext$happyINDscale ~ trust1,
data= wvs.ext)
summary(model77)
##
## Call:
## lm(formula = wvs.ext$happyINDscale ~ trust1, data = wvs.ext)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.2345 -0.6103 -0.1216 0.6890 3.7763
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.066967 0.012908 -5.188 2.17e-07 ***
## trust1 0.018212 0.002426 7.509 6.47e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9846 on 10180 degrees of freedom
## (599 observations deleted due to missingness)
## Multiple R-squared: 0.005508, Adjusted R-squared: 0.00541
## F-statistic: 56.38 on 1 and 10180 DF, p-value: 6.472e-14
model7 <- lm(wvs.ext$happyINDscale ~ income1 + age * trust1,
data= wvs.ext)
summary(model7)
##
## Call:
## lm(formula = wvs.ext$happyINDscale ~ income1 + age * trust1,
## data = wvs.ext)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.6099 -0.6192 -0.0306 0.6356 4.3167
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.1112880 0.0354871 -3.136 0.001719 **
## income1 0.0710286 0.0042211 16.827 < 2e-16 ***
## age -0.0138409 0.0008616 -16.064 < 2e-16 ***
## trust1 -0.0023656 0.0052067 -0.454 0.649598
## age:trust1 0.0005814 0.0001618 3.593 0.000328 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9258 on 8276 degrees of freedom
## (2500 observations deleted due to missingness)
## Multiple R-squared: 0.07893, Adjusted R-squared: 0.07849
## F-statistic: 177.3 on 4 and 8276 DF, p-value: < 2.2e-16
plot_model(model7, type = "int")
After several trials I’ve decided to look at the trustful people and see how happy are they. My assumption is that trust will have positive relation to respondents’ happiness. This assumption is supported bw the results of model77. But it’s very weak and has the liniest explanatory power, so… ignore it :)
It’s more interesting to see, how ‘trust’ variable may bу implemented to the model with age and income, which was created earlier. That is model7. This model is significant, explains 8% of the variability of happiness (less, than before, not still not bad, i would say!) Here we may see, that trust itself doesn’t play any role (as it’s relation is insignificant), BUT it IS significant in interaction with respondents’ age :) In short: yes, age has a negative relation to respondent’s happiness, HOWEVER if respondent is trustful toward other people, they would be happier than those, who don’t trust. This is visualized be the graph, too.
SO, people, trust each other and your level of happiness will decrease a little bit slower :)
I’ve decided not to delete my draft work, because it may be interesting to look at my mistakes, or maybe not… Anyways, here it is
I am interested in family relation to the respondents happiness: if people, for whom family is important are less or more happy, than contrasting cases. I also want to see it with income level. Maybe I’ll try to look also at how importance of leisure time relate to this picture, too.
model4 <- lm(wvs.ext$happyINDscale ~ as.numeric(wvs.ext$Family))
summary(model4)
##
## Call:
## lm(formula = wvs.ext$happyINDscale ~ as.numeric(wvs.ext$Family))
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.1004 -0.5667 -0.1514 0.8230 4.3057
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.26065 0.02793 9.333 <2e-16 ***
## as.numeric(wvs.ext$Family) -0.27959 0.02235 -12.512 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9664 on 9395 degrees of freedom
## (1384 observations deleted due to missingness)
## Multiple R-squared: 0.01639, Adjusted R-squared: 0.01628
## F-statistic: 156.5 on 1 and 9395 DF, p-value: < 2.2e-16
model5 <- lm(wvs.ext$happyINDscale ~ as.numeric(wvs.ext$income1) + as.numeric(wvs.ext$Family))
summary(model5)
##
## Call:
## lm(formula = wvs.ext$happyINDscale ~ as.numeric(wvs.ext$income1) +
## as.numeric(wvs.ext$Family))
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.4520 -0.6270 -0.0096 0.6723 4.4568
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.150179 0.036856 -4.075 4.65e-05 ***
## as.numeric(wvs.ext$income1) 0.075095 0.004218 17.802 < 2e-16 ***
## as.numeric(wvs.ext$Family) -0.268115 0.023004 -11.655 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9426 on 8531 degrees of freedom
## (2247 observations deleted due to missingness)
## Multiple R-squared: 0.05243, Adjusted R-squared: 0.05221
## F-statistic: 236 on 2 and 8531 DF, p-value: < 2.2e-16
model6 <- lm(wvs.ext$happyINDscale ~ income1 * Family + age ,
data= wvs.ext)
summary(model6)
##
## Call:
## lm(formula = wvs.ext$happyINDscale ~ income1 * Family + age,
## data = wvs.ext)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.6257 -0.6214 -0.0305 0.6316 4.4225
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.0888302 0.0318345 -2.790 0.005276
## income1 0.0682823 0.0045400 15.040 < 2e-16
## FamilyRather important -0.2971644 0.0707142 -4.202 2.67e-05
## FamilyNot very important -0.4936819 0.1467973 -3.363 0.000774
## FamilyNot at all important -0.1179495 0.3062957 -0.385 0.700185
## age -0.0109575 0.0006518 -16.810 < 2e-16
## income1:FamilyRather important 0.0044181 0.0121622 0.363 0.716415
## income1:FamilyNot very important 0.0210534 0.0289980 0.726 0.467841
## income1:FamilyNot at all important -0.0683376 0.0644538 -1.060 0.289058
##
## (Intercept) **
## income1 ***
## FamilyRather important ***
## FamilyNot very important ***
## FamilyNot at all important
## age ***
## income1:FamilyRather important
## income1:FamilyNot very important
## income1:FamilyNot at all important
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.927 on 8520 degrees of freedom
## (2252 observations deleted due to missingness)
## Multiple R-squared: 0.08419, Adjusted R-squared: 0.08333
## F-statistic: 97.9 on 8 and 8520 DF, p-value: < 2.2e-16
plot_model(model6, type = "int")
SO
I’d like to focus on models 4 and 6 I’ve added FAMILY variable to the model, assuming that people, for whom Family is important (very important), would be happier, than contrasting group. And also, I wanted to look at this relation with income level, how we’ve before. And here are the results, which were a bit contr0intuitive for me.
ONCE AGAIN
. . . . . . . . .