library(foreign)
wvs = read.spss("wvs RU.sav", to.data.frame=TRUE)
names(wvs)

##   [1] "S003"    "S004"    "S007"    "S009"    "S010"    "S018"    "S018a"  
##   [8] "S019"    "S019a"   "S021"    "S021A"   "S024"    "S024A"   "S025"   
##  [15] "S025A"   "S026"    "V1"      "V1A"     "V1B"     "V2"      "V2A"    
##  [22] "V3"      "V4"      "V5"      "V6"      "V7"      "V8"      "V9"     
##  [29] "V10"     "V11"     "V12"     "V13"     "V14"     "V15"     "V16"    
##  [36] "V17"     "V18"     "V19"     "V20"     "V21"     "V22"     "V23"    
##  [43] "V24"     "V25"     "V26"     "V27"     "V28"     "V29"     "V30"    
##  [50] "V31"     "V32"     "V33"     "V34"     "V35"     "V36"     "V37"    
##  [57] "V38"     "V39"     "V40"     "V41"     "V42"     "V43"     "V44"    
##  [64] "V45"     "V46"     "V47"     "V48"     "V49"     "V50"     "V51"    
##  [71] "V52"     "V53"     "V54"     "V55"     "V56"     "V57"     "V58"    
##  [78] "V59"     "V60"     "V61"     "V62"     "V63"     "V64"     "V65"    
##  [85] "V66"     "V67"     "V68"     "V69"     "V69HK"   "V70"     "V70HK"  
##  [92] "V71"     "V72"     "V73"     "V74HK"   "V74"     "V73HK"   "V75"    
##  [99] "V76"     "V77"     "V78"     "V79"     "V80"     "V81"     "V82"    
## [106] "V83"     "V84"     "V85"     "V86"     "V87"     "V88"     "V89"    
## [113] "V90"     "V91"     "V92"     "V93"     "V94"     "V95"     "V96"    
## [120] "V97"     "V98"     "V99"     "V100"    "V101"    "V102"    "V103"   
## [127] "V104"    "V105"    "V106"    "V107"    "V108"    "V109"    "V110"   
## [134] "V111"    "V112"    "V113"    "V114"    "V115"    "V116"    "V117"   
## [141] "V118"    "V119"    "V120"    "V121"    "V122"    "V123"    "V124"   
## [148] "V125"    "V126"    "V127"    "V128"    "V129"    "V130"    "V131"   
## [155] "V132"    "V133"    "V134"    "V135"    "V136"    "V137"    "V138"   
## [162] "V139"    "V140"    "V141"    "V142"    "V143"    "V144"    "V145"   
## [169] "V146"    "V147"    "V148"    "V149"    "V150"    "V151"    "V152"   
## [176] "V153"    "V154"    "V155"    "V156"    "V157"    "V158"    "V159"   
## [183] "V160"    "V161"    "V162"    "V163"    "V164"    "V165"    "V166"   
## [190] "V167"    "V168"    "V169"    "V170"    "V171"    "V172"    "V173"   
## [197] "V174"    "V175"    "V176"    "V177"    "V178"    "V179"    "V180"   
## [204] "V181"    "V182"    "V183"    "V184"    "V185"    "V186"    "V187"   
## [211] "V188"    "V189"    "V190"    "V191"    "V192"    "V193"    "V194"   
## [218] "V195"    "V196"    "V197"    "V198"    "V199"    "V200"    "V201"   
## [225] "V202"    "V203"    "V204"    "V205"    "V206"    "V207"    "V208"   
## [232] "V209"    "V210"    "V211"    "V212"    "V214"    "V215"    "V216"   
## [239] "V217"    "V218"    "V219"    "V220"    "V221"    "V222"    "V223"   
## [246] "V224"    "V225"    "V226"    "V227"    "V228"    "V229"    "V230"   
## [253] "V231"    "V232"    "V233"    "V233A"   "V234"    "V235"    "V236"   
## [260] "V237"    "V238"    "V239"    "V240"    "V241"    "V242"    "V243"   
## [267] "V244"    "V245"    "V246"    "V247"    "V248"    "V249"    "V250"   
## [274] "V251"    "V252"    "V252B"   "V253"    "V253CS"  "V254"    "V255"   
## [281] "V255CS"  "V256"    "V257"    "V258"    "V259A"   "V259"    "V260"   
## [288] "number"  "country"

PART 1. Preparing the data

Find the variables.

wvs.ext <- data.frame(wvs$V237, wvs$V55, wvs$V238, wvs$V253, 
                      wvs$V10, wvs$V22, wvs$S009, wvs$V23,
                      wvs$V46, wvs$V205,
                      wvs$V4,wvs$V5,wvs$V6,wvs$V7,wvs$V8,wvs$V9)
names (wvs.ext) <- c ("age", "marital", "edu", "income", 
                      "happy", "satisf", "country", "trust", 
                      "choice", "divorce", "Family", 
                      "Friends", "LeisureTime", "Politics",
                      "Work", "Religion")

Check the type of variables and RECODE each variable if necessary.

For example:

class(wvs.ext$satisf)

## [1] "factor"

table(wvs.ext$satisf)

## 
## Dissatisfied            2            3            4            5 
##          767          427          902          869         2262 
##            6            7            8            9    Satisfied 
##         1135         1304         1363          621          941

wvs.ext$satisf1 <-   ifelse(wvs.ext$satisf=="Satisfied",10,
                             ifelse(wvs.ext$satisf=="Dissatisfied",1,
                                    wvs.ext$satisf))
table(wvs.ext$satisf, wvs.ext$satisf1)

##               
##                   1    2    3    4    5    6    7    8    9   10
##   Dissatisfied  767    0    0    0    0    0    0    0    0    0
##   2               0  427    0    0    0    0    0    0    0    0
##   3               0    0  902    0    0    0    0    0    0    0
##   4               0    0    0  869    0    0    0    0    0    0
##   5               0    0    0    0 2262    0    0    0    0    0
##   6               0    0    0    0    0 1135    0    0    0    0
##   7               0    0    0    0    0    0 1304    0    0    0
##   8               0    0    0    0    0    0    0 1363    0    0
##   9               0    0    0    0    0    0    0    0  621    0
##   Satisfied       0    0    0    0    0    0    0    0    0  941

wvs.ext$satisf <- as.numeric(wvs.ext$satisf)

happy

plot(wvs.ext$happy)

wvs.ext$happy0 <-   ifelse(wvs.ext$happy=="Not at all happy",1,
                            ifelse(wvs.ext$happy=="Not very happy",2,
                                   ifelse(wvs.ext$happy=="Quite happy",3,
                                          ifelse(wvs.ext$happy=="Very happy",4, NA))))
table(wvs.ext$happy, wvs.ext$happy0)

##                   
##                       1    2    3    4
##   Very happy          0    0    0 1079
##   Quite happy         0    0 5516    0
##   Not very happy      0 3161    0    0
##   Not at all happy  507    0    0    0

income

wvs.ext$income1 <- ifelse (wvs.ext$income=="Eigth step",8, 
                            ifelse(wvs.ext$income=="Fifth step",5, 
                                   ifelse(wvs.ext$income=="Fourth step",4, 
                                          ifelse(wvs.ext$income=="Lower step",1,
                                                 ifelse(wvs.ext$income=="Nineth step",9,
                                                        ifelse(wvs.ext$income=="second step",2,
                                                               ifelse(wvs.ext$income=="Seventh step",7,
                                                                      ifelse(wvs.ext$income=="Sixth step",6, 
                                                                             ifelse(wvs.ext$income=="Tenth step",10,
                                                                                    ifelse(wvs.ext$income=="Third step",3,NA))))))))))                                                                               
table(wvs.ext$income, wvs.ext$income1)

##               
##                   1    2    3    4    5    6    7    8    9   10
##   Lower step    413    0    0    0    0    0    0    0    0    0
##   second step     0  718    0    0    0    0    0    0    0    0
##   Third step      0    0 1363    0    0    0    0    0    0    0
##   Fourth step     0    0    0 1225    0    0    0    0    0    0
##   Fifth step      0    0    0    0 1301    0    0    0    0    0
##   Sixth step      0    0    0    0    0  872    0    0    0    0
##   Seventh step    0    0    0    0    0    0 1002    0    0    0
##   Eigth step      0    0    0    0    0    0    0  772    0    0
##   Nineth step     0    0    0    0    0    0    0    0  512    0
##   Tenth step      0    0    0    0    0    0    0    0    0  448

choice

table(wvs.ext$choice)

## 
##  None at all            2            3            4            5 
##          552          307          531          650         1786 
##            6            7            8            9 A great deal 
##          894         1021         1175          568         1239

wvs.ext$choice1 <- ifelse (wvs.ext$choice=="None at all",1,
                            ifelse(wvs.ext$choice=="A great deal",10, 
                                   wvs.ext$choice))                                                                               
table(wvs.ext$choice1,wvs.ext$choice)

##     
##      None at all    2    3    4    5    6    7    8    9 A great deal
##   1          552    0    0    0    0    0    0    0    0            0
##   2            0  307    0    0    0    0    0    0    0            0
##   3            0    0  531    0    0    0    0    0    0            0
##   4            0    0    0  650    0    0    0    0    0            0
##   5            0    0    0    0 1786    0    0    0    0            0
##   6            0    0    0    0    0  894    0    0    0            0
##   7            0    0    0    0    0    0 1021    0    0            0
##   8            0    0    0    0    0    0    0 1175    0            0
##   9            0    0    0    0    0    0    0    0  568            0
##   10           0    0    0    0    0    0    0    0    0         1239

Creating index of happiness

wvs.ext$happyIND<- rowMeans(wvs.ext[c('happy0','satisf1')], na.rm=T)                            
summary(wvs.ext$happyIND)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   1.000   3.000   4.000   4.262   5.500  10.000      60

hist(wvs.ext$happyIND)

class(wvs.ext$happyIND)

## [1] "numeric"

Can you remember what standardization means and why it necessary?

Standardization is the process of putting differeent variables on the same scale, as in this way it is easier to compare them

wvs.ext$happyINDscale <- scale (wvs.ext$happyIND)
hist(wvs.ext$happyINDscale)

“happyINDscale” is the dependent variable.

MODELING: relationships between the level of happiness, income, and age

model1 <- lm(wvs.ext$happyINDscale~ as.numeric(wvs.ext$age) +
               as.numeric(wvs.ext$income1) + as.numeric(wvs.ext$choice))
summary(model1)

## 
## Call:
## lm(formula = wvs.ext$happyINDscale ~ as.numeric(wvs.ext$age) + 
##     as.numeric(wvs.ext$income1) + as.numeric(wvs.ext$choice))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.9537 -0.5839  0.0046  0.5627  4.6403 
## 
## Coefficients:
##                               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 -0.8561183  0.0383086  -22.35   <2e-16 ***
## as.numeric(wvs.ext$age)     -0.0090692  0.0006403  -14.16   <2e-16 ***
## as.numeric(wvs.ext$income1)  0.0482466  0.0040943   11.78   <2e-16 ***
## as.numeric(wvs.ext$choice)   0.1280611  0.0039149   32.71   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8773 on 7928 degrees of freedom
##   (2849 observations deleted due to missingness)
## Multiple R-squared:  0.1816, Adjusted R-squared:  0.1813 
## F-statistic: 586.6 on 3 and 7928 DF,  p-value: < 2.2e-16

Interpretation of the results of regression:

p-value estimates show that all the predictors (age,income, choice are significant)

With one unit increase in age, the happiness level decreases by 0,0090692 (The older the person the lower his or her level of happiness)

With one unit increase in income, the happiness level increases by 0.0482466 (The richer the person the happier he or she is)

With one unit increase in choice variable^ the happiness level increases by 0.1280611 (The more freedom the one has, the happier he or she is)

Do you have outliers and leverage?

par(mfrow=c(2,2))
plot(model1)

library(car)

## Loading required package: carData

outlierTest(model1)

##      rstudent unadjusted p-value Bonferroni p
## 9356 5.300615         1.1852e-07   0.00094012
## 4910 5.169721         2.4017e-07   0.00190500
## 4162 5.051331         4.4853e-07   0.00355780
## 980  4.797230         1.6381e-06   0.01299400
## 3625 4.796166         1.6468e-06   0.01306300
## 8979 4.723898         2.3535e-06   0.01866800
## 4287 4.565357         5.0616e-06   0.04014900

qqPlot(model1, main="QQ plot")

## [1] 4910 9356

leveragePlots(model1)

We have two extreme outliers 4910 and 9356

INTERACTION MODELS

Ok, the older the person is, the lower the level of happiness is;

and the richer the person is, the happier hi/she is.

And we can suggest that rich people are happier even in old age.

Let’s test this hypothesis!

wvs.ext$age <- as.numeric(wvs.ext$age)
wvs.ext$income1 <- as.numeric(wvs.ext$income1)
wvs.ext$choice <- as.numeric(wvs.ext$choice)

model2 <- lm(wvs.ext$happyINDscale~ choice + age * income1 , 
             data= wvs.ext)
summary(model2)

## 
## Call:
## lm(formula = wvs.ext$happyINDscale ~ choice + age * income1, 
##     data = wvs.ext)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.9269 -0.5843  0.0024  0.5612  4.6413 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.8167208  0.0534746 -15.273  < 2e-16 ***
## choice       0.1278022  0.0039225  32.581  < 2e-16 ***
## age         -0.0104389  0.0014465  -7.217 5.82e-13 ***
## income1      0.0407445  0.0081998   4.969 6.87e-07 ***
## age:income1  0.0002793  0.0002645   1.056    0.291    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8772 on 7927 degrees of freedom
##   (2849 observations deleted due to missingness)
## Multiple R-squared:  0.1818, Adjusted R-squared:  0.1813 
## F-statistic: 440.2 on 4 and 7927 DF,  p-value: < 2.2e-16

library(sjPlot)

## Install package "strengejacke" from GitHub (`devtools::install_github("strengejacke/strengejacke")`) to load all sj-packages at once!

plot_model(model2, type = "int")

The interactive effect is insignificant, which means that with age the level of happiness decreases equally for both the poor and the rich.

What about the choice?

If older people feel they have completely free choice and control over their lives,are they happier than those who think “no choice at all”?

model3 <- lm(wvs.ext$happyINDscale~ age * choice + income1 , 
             data= wvs.ext)
summary(model3)

## 
## Call:
## lm(formula = wvs.ext$happyINDscale ~ age * choice + income1, 
##     data = wvs.ext)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.8555 -0.5720 -0.0009  0.5641  4.6403 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.6454136  0.0570204 -11.319  < 2e-16 ***
## age         -0.0158706  0.0015073 -10.529  < 2e-16 ***
## choice       0.0943615  0.0078117  12.080  < 2e-16 ***
## income1      0.0472345  0.0040932  11.540  < 2e-16 ***
## age:choice   0.0011569  0.0002322   4.983  6.4e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8759 on 7927 degrees of freedom
##   (2849 observations deleted due to missingness)
## Multiple R-squared:  0.1842, Adjusted R-squared:  0.1838 
## F-statistic: 447.5 on 4 and 7927 DF,  p-value: < 2.2e-16

plot_model(model3, type = "int")

Interpretation of the result:

With age the level of hahappiness for those who have no freedom at all decreases more rapidly, than for those who have a great deal of freedom

Task

Now select one or two variables (from “wvs.ext” dataset), add interactive effect in model2, build a plot and interpret a new result.

model4 <- lm(wvs.ext$happyINDscale~ income1 + choice + age * trust , 
             data= wvs.ext) 
summary(model4)

## 
## Call:
## lm(formula = wvs.ext$happyINDscale ~ income1 + choice + age * 
##     trust, data = wvs.ext)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.9372 -0.5815  0.0061  0.5595  4.4663 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   -0.824860   0.052183 -15.807  < 2e-16 ***
## income1                        0.048832   0.004143  11.786  < 2e-16 ***
## choice                         0.126242   0.003970  31.801  < 2e-16 ***
## age                           -0.006840   0.001230  -5.561 2.77e-08 ***
## trustCanґt be too careful     -0.027938   0.046373  -0.602    0.547    
## age:trustCanґt be too careful -0.003164   0.001440  -2.197    0.028 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8725 on 7656 degrees of freedom
##   (3119 observations deleted due to missingness)
## Multiple R-squared:  0.1832, Adjusted R-squared:  0.1827 
## F-statistic: 343.5 on 5 and 7656 DF,  p-value: < 2.2e-16

plot_model(model4, type = "int")