Installing and libraring necessary packages for the analysis

options(repos = c(CRAN = "https://cloud.r-project.org/"))

library(tidyverse) #Tidyverse collection of tidyr, dplyr,readr,ggplot2, etc.
## Warning: package 'ggplot2' was built under R version 4.3.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.3     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.5.0     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl) #readxl to read and import excel files

library(Hmisc)
## 
## Attaching package: 'Hmisc'
## 
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## 
## The following objects are masked from 'package:base':
## 
##     format.pval, units
library(gridExtra) #gridExtra to combine images side-by-side
## 
## Attaching package: 'gridExtra'
## 
## The following object is masked from 'package:dplyr':
## 
##     combine
library(olsrr)
## 
## Attaching package: 'olsrr'
## 
## The following object is masked from 'package:datasets':
## 
##     rivers

Data Importation and Setting of seed for reproductibility

After necessary packages needed for the analysis and gain insights has beep unpacked, next thing needed to be done is to import the happiness dataset stored on my laptop. This is done with the code below

data <- read.csv("C:/Users/DELL/Desktop/Naomi_work/Naomi/Happiness.csv") 
set.seed(123)  # for reproducibility

Preliminary Analysis

Step one: Data Validation and exploration analysis

Taking a view of the dataset

head(data, 2) 
##   X YEAR ID_ HHTYPE1 COHORT BALLOT OVERSAMP WRKSTAT MARITAL DIVORCE SPWRKSTA
## 1 1 1972   1      NA   1949   -100        1       1       5      NA       NA
## 2 2 1972  10      NA   1942   -100        1       1       1       2        1
##   CHILDS AGE EDUC DEGREE SEX RACE RES16 REG16 MOBILE16 INCOM16 BORN PARBORN
## 1      0  23   16      3   2    1     5     2        3       3   NA      NA
## 2      4  30   12      1   2    2     3     7        1       1   NA      NA
##   GRANBORN HOMPOP BABIES PRETEEN ADULTS EARNRS INCOME RINCOME INCOME72 INCOME77
## 1       NA      1      0       0      1      1     NA      NA        4       NA
## 2       NA      7      2       2      2      2     NA      NA        4       NA
##   RINCOM77 INCOME82 RINCOM82 INCOME86 RINCOM86 INCOME91 RINCOM91 INCOME98
## 1       NA       NA       NA       NA       NA       NA       NA       NA
## 2       NA       NA       NA       NA       NA       NA       NA       NA
##   RINCOM98 INCOME06 RINCOM06 REGION XNORCSIZ SRCBELT SIZE PARTYID POLVIEWS TAX
## 1       NA       NA       NA      3        3       3   72       2       NA  NA
## 2       NA       NA       NA      7        2       5  127       0       NA  NA
##   RELIG FUND ATTEND RELITEN HAPPY HAPMAR HAPCOHAB HEALTH LIFE HELPFUL CONFINAN
## 1     3    3      2      NA     3     NA       NA      2   NA       2       NA
## 2     1    1      8      NA     2     NA       NA      3   NA       1       NA
##   CONBUS CONCLERG CONEDUC CONFED CONLABOR CONPRESS CONMEDIC CONTV CONJUDGE
## 1     NA       NA      NA     NA       NA       NA       NA    NA       NA
## 2     NA       NA      NA     NA       NA       NA       NA    NA       NA
##   CONSCI CONLEGIS CONARMY SOCREL SOCOMMUN SOCFREND SOCBAR JOBLOSE JOBFIND
## 1     NA       NA      NA     NA       NA       NA     NA      NA      NA
## 2     NA       NA      NA     NA       NA       NA     NA      NA      NA
##   SATJOB SATFIN FINALTER FINRELA UNEMP TVHOURS JOBFIND1 EQINCOME MASEI10
## 1      3      3        1       3    NA      NA       NA       NA      NA
## 2      2      3        3       2    NA      NA       NA       NA      NA
##   PASEI10 PRESTG10 SEI10 SPPRES10 SPSEI10 INCOME16 RINCOM16 MABORN PABORN
## 1      62       45  50.0       NA      NA       NA       NA     NA     NA
## 2      32       25  12.6       25    36.4       NA       NA     NA     NA
##   CLASS_ TEENS WTSSCOMP HappyPartner KidsAtHome JobFind MomBornUS DadBornUS
## 1      3     0   0.4446           NA          0      NA        NA        NA
## 2      2     1   0.8893           NA          5      NA        NA        NA
##   PersIncome FamIncome InflTarget InflNominal PersIncomeAdj FamIncomeAdj WEIGHT
## 1         NA      6996   281.3283       40.35            NA     48777.52 0.4446
## 2         NA      6996   294.4790       39.60            NA     52024.62 0.8893
##   Z.Inc_Pers  Z.Inc_Fam P.Inc_Pers P.Inc_Fam
## 1         NA -0.4570523         NA 0.3031335
## 2         NA -0.4211170         NA 0.3562670

Looking at the dataset structure

str(data) 

Filtering for 2018

Twenty_18 <- data %>% filter(YEAR==2018) 
Twenty_18_variables <- Twenty_18 %>% select(HAPPY, RACE, MARITAL,SEX, BORN, RELIG, REGION, FamIncomeAdj)
head(Twenty_18_variables, 2)
##   HAPPY RACE MARITAL SEX BORN RELIG REGION FamIncomeAdj
## 1     2    1       5   1    1    11      1           NA
## 2     2    1       1   1    1     1      1     120264.4

Analysis to extimate percentage of missing values for each selected variables

(Twenty_18_variables %>% select(MARITAL)%>% is.na()%>% sum()/length(Twenty_18_variables$MARITAL))*100
## [1] 0.08532423
(Twenty_18_variables %>% select(BORN)%>% is.na()%>% sum()/length(Twenty_18_variables$BORN))*100
## [1] 0.04266212
(Twenty_18_variables %>% select(RELIG)%>% is.na()%>% sum()/length(Twenty_18_variables$RELIG))*100
## [1] 0.8959044
(Twenty_18_variables %>% select(REGION)%>% is.na()%>% sum()/length(Twenty_18_variables$REGION))*100
## [1] 0
(Twenty_18_variables %>% select(RACE)%>% is.na()%>% sum()/length(Twenty_18_variables$RACE))*100
## [1] 0
(Twenty_18_variables %>% select(HAPPY)%>% is.na()%>% sum()/length(Twenty_18_variables$HAPPY))*100
## [1] 0
(Twenty_18_variables %>% select(FamIncomeAdj)%>% is.na()%>% sum()/length(Twenty_18_variables$FamIncomeAdj))*100
## [1] 8.233788
(Twenty_18_variables %>% select(SEX)%>% is.na()%>% sum()/length(Twenty_18_variables$SEX))*100
## [1] 0

Removing Missing values

Twenty_18_variables <-na.omit(Twenty_18_variables)
summary(Twenty_18_variables)
##      HAPPY            RACE          MARITAL           SEX       
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:1.000   1st Qu.:1.000   1st Qu.:1.000   1st Qu.:1.000  
##  Median :2.000   Median :1.000   Median :2.000   Median :2.000  
##  Mean   :1.846   Mean   :1.381   Mean   :2.654   Mean   :1.547  
##  3rd Qu.:2.000   3rd Qu.:2.000   3rd Qu.:5.000   3rd Qu.:2.000  
##  Max.   :3.000   Max.   :3.000   Max.   :5.000   Max.   :2.000  
##       BORN           RELIG           REGION       FamIncomeAdj     
##  Min.   :1.000   Min.   : 1.00   Min.   :1.000   Min.   :   521.9  
##  1st Qu.:1.000   1st Qu.: 1.00   1st Qu.:3.000   1st Qu.: 32981.3  
##  Median :1.000   Median : 2.00   Median :5.000   Median : 68207.8  
##  Mean   :1.121   Mean   : 2.27   Mean   :5.249   Mean   :118102.6  
##  3rd Qu.:1.000   3rd Qu.: 4.00   3rd Qu.:7.000   3rd Qu.:124014.3  
##  Max.   :2.000   Max.   :13.00   Max.   :9.000   Max.   :668794.3

Visualizations

ggplot(Twenty_18_variables, aes(x = HAPPY, fill = factor(SEX), color=factor(SEX))) +
  geom_bar(position = "dodge") +
  geom_text(stat = "count", aes(label = after_stat(count), vjust = -0.5), position = position_dodge(0.9)) +
  scale_fill_manual(values = c("male" = "blue", "female" = "pink")) +
  labs(x = "Happiness", y = "Count") +
  theme_minimal()
## Warning: No shared levels found between `names(values)` of the manual scale and the
## data's fill values.
## No shared levels found between `names(values)` of the manual scale and the
## data's fill values.
## No shared levels found between `names(values)` of the manual scale and the
## data's fill values.

ggplot(Twenty_18_variables, aes(x = HAPPY, fill = factor(RACE), color=factor(RACE))) +
  geom_bar(position = "dodge") +
  geom_text(stat = "count", aes(label = after_stat(count), vjust = -0.5), position = position_dodge(0.9)) +
  scale_fill_manual(values = c("male" = "blue", "female" = "pink")) +
  labs(x = "Happiness", y = "Count") +
  theme_minimal()
## Warning: No shared levels found between `names(values)` of the manual scale and the
## data's fill values.
## No shared levels found between `names(values)` of the manual scale and the
## data's fill values.
## No shared levels found between `names(values)` of the manual scale and the
## data's fill values.

ggplot(Twenty_18_variables, aes(x = HAPPY, fill = factor(BORN), color=factor(BORN))) +
  geom_bar(position = "dodge") +
  geom_text(stat = "count", aes(label = after_stat(count), vjust = -0.5), position = position_dodge(0.9)) +
  scale_fill_manual(values = c("male" = "blue", "female" = "pink")) +
  labs(x = "Happiness", y = "Count") +
  theme_minimal()
## Warning: No shared levels found between `names(values)` of the manual scale and the
## data's fill values.
## No shared levels found between `names(values)` of the manual scale and the
## data's fill values.
## No shared levels found between `names(values)` of the manual scale and the
## data's fill values.

ggplot(Twenty_18_variables, aes(x = HAPPY, fill = factor(RELIG), color=factor(RELIG))) +
  geom_bar(position = "dodge") +
  geom_text(stat = "count", aes(label = after_stat(count), vjust = -0.5), position = position_dodge(0.9)) +
  scale_fill_manual(values = c("male" = "blue", "female" = "pink")) +
  labs(x = "Happiness", y = "Count") +
  theme_minimal()
## Warning: No shared levels found between `names(values)` of the manual scale and the
## data's fill values.
## No shared levels found between `names(values)` of the manual scale and the
## data's fill values.
## No shared levels found between `names(values)` of the manual scale and the
## data's fill values.

ggplot(Twenty_18_variables, aes(x = HAPPY, fill = factor(REGION), color=factor(REGION))) +
  geom_bar(position = "dodge") +
  geom_text(stat = "count", aes(label = after_stat(count), vjust = -0.5), position = position_dodge(0.9)) +
  scale_fill_manual(values = c("male" = "blue", "female" = "pink")) +
  labs(x = "Happiness", y = "Count") +
  theme_minimal()
## Warning: No shared levels found between `names(values)` of the manual scale and the
## data's fill values.
## No shared levels found between `names(values)` of the manual scale and the
## data's fill values.
## No shared levels found between `names(values)` of the manual scale and the
## data's fill values.

Stage Two: Data Stratifying

Male <- Twenty_18_variables %>% filter(SEX==1)
Female <- Twenty_18_variables %>% filter(SEX==2)

Stage Three: Relationship Analysis

rcorr(as.matrix(Twenty_18_variables))
##              HAPPY  RACE MARITAL   SEX  BORN RELIG REGION FamIncomeAdj
## HAPPY         1.00  0.04    0.25  0.00  0.00  0.04  -0.02        -0.19
## RACE          0.04  1.00    0.13  0.01  0.34  0.06   0.14        -0.13
## MARITAL       0.25  0.13    1.00 -0.02 -0.10  0.10  -0.02        -0.25
## SEX           0.00  0.01   -0.02  1.00  0.01 -0.04   0.01        -0.05
## BORN          0.00  0.34   -0.10  0.01  1.00  0.07   0.11        -0.04
## RELIG         0.04  0.06    0.10 -0.04  0.07  1.00   0.01         0.02
## REGION       -0.02  0.14   -0.02  0.01  0.11  0.01   1.00         0.01
## FamIncomeAdj -0.19 -0.13   -0.25 -0.05 -0.04  0.02   0.01         1.00
## 
## n= 2136 
## 
## 
## P
##              HAPPY  RACE   MARITAL SEX    BORN   RELIG  REGION FamIncomeAdj
## HAPPY               0.0386 0.0000  0.8644 0.8393 0.0605 0.4256 0.0000      
## RACE         0.0386        0.0000  0.6565 0.0000 0.0061 0.0000 0.0000      
## MARITAL      0.0000 0.0000         0.2914 0.0000 0.0000 0.3292 0.0000      
## SEX          0.8644 0.6565 0.2914         0.6534 0.0684 0.5588 0.0350      
## BORN         0.8393 0.0000 0.0000  0.6534        0.0012 0.0000 0.0730      
## RELIG        0.0605 0.0061 0.0000  0.0684 0.0012        0.7535 0.4144      
## REGION       0.4256 0.0000 0.3292  0.5588 0.0000 0.7535        0.5034      
## FamIncomeAdj 0.0000 0.0000 0.0000  0.0350 0.0730 0.4144 0.5034
pairs(Male, col="red")

pairs(Female, col="red")

Male happiness depends(or been negatively or positively influenced by marital, race, born, religion, region, and family income)

ggplot(Male, aes(MARITAL, HAPPY))+geom_point()
ggplot(Male, aes(RACE, HAPPY))+geom_point()
ggplot(Male, aes(BORN, HAPPY))+geom_point()
ggplot(Male, aes(RELIG, HAPPY))+geom_point()
ggplot(Male, aes(REGION, HAPPY))+geom_point()
ggplot(Male, aes(FamIncomeAdj, HAPPY))+geom_point()
cor.test(Male$MARITAL, Male$HAPPY)
## 
##  Pearson's product-moment correlation
## 
## data:  Male$MARITAL and Male$HAPPY
## t = 9.0868, df = 966, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.2215239 0.3376565
## sample estimates:
##       cor 
## 0.2806169
cor.test(Male$RACE, Male$HAPPY)
## 
##  Pearson's product-moment correlation
## 
## data:  Male$RACE and Male$HAPPY
## t = 0.86505, df = 966, p-value = 0.3872
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.03525007  0.09067254
## sample estimates:
##        cor 
## 0.02782161
cor.test(Male$BORN, Male$HAPPY)
## 
##  Pearson's product-moment correlation
## 
## data:  Male$BORN and Male$HAPPY
## t = 0.50412, df = 966, p-value = 0.6143
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.04684003  0.07914672
## sample estimates:
##        cor 
## 0.01621772
cor.test(Male$RELIG, Male$HAPPY)
## 
##  Pearson's product-moment correlation
## 
## data:  Male$RELIG and Male$HAPPY
## t = 1.6885, df = 966, p-value = 0.09163
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.008792515  0.116857875
## sample estimates:
##        cor 
## 0.05424742
cor.test(Male$REGION,Male$HAPPY)
## 
##  Pearson's product-moment correlation
## 
## data:  Male$REGION and Male$HAPPY
## t = 0.37955, df = 966, p-value = 0.7044
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.05083809  0.07516297
## sample estimates:
##        cor 
## 0.01221091
cor.test(Male$FamIncomeAdj, Male$HAPPY)
## 
##  Pearson's product-moment correlation
## 
## data:  Male$FamIncomeAdj and Male$HAPPY
## t = -5.638, df = 966, p-value = 2.258e-08
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.2388103 -0.1167898
## sample estimates:
##        cor 
## -0.1784862

Female happiness depends(or been negatively or positively influenced by marital, race, born, religion, region, and family income)

ggplot(Female, aes(MARITAL, HAPPY))+geom_point()
ggplot(Female, aes(RACE, HAPPY))+geom_point()
ggplot(Female, aes(BORN, HAPPY))+geom_point()
ggplot(Female, aes(RELIG, HAPPY))+geom_point()
ggplot(Female, aes(REGION, HAPPY))+geom_point()
ggplot(Female, aes(FamIncomeAdj, HAPPY))+geom_point()
cor.test(Female$MARITAL, Female$HAPPY)
## 
##  Pearson's product-moment correlation
## 
## data:  Female$MARITAL and Female$HAPPY
## t = 7.9588, df = 1166, p-value = 4.092e-15
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.1718699 0.2806971
## sample estimates:
##      cor 
## 0.226992
cor.test(Female$RACE, Female$HAPPY)
## 
##  Pearson's product-moment correlation
## 
## data:  Female$RACE and Female$HAPPY
## t = 2.0304, df = 1166, p-value = 0.04254
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.002002704 0.116319597
## sample estimates:
##        cor 
## 0.05935575
cor.test(Female$BORN, Female$HAPPY)
## 
##  Pearson's product-moment correlation
## 
## data:  Female$BORN and Female$HAPPY
## t = -0.72736, df = 1166, p-value = 0.4672
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.07856009  0.03610779
## sample estimates:
##         cor 
## -0.02129618
cor.test(Female$RELIG, Female$HAPPY)
## 
##  Pearson's product-moment correlation
## 
## data:  Female$RELIG and Female$HAPPY
## t = 1.0253, df = 1166, p-value = 0.3055
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.02739553  0.08722122
## sample estimates:
##        cor 
## 0.03001149
cor.test(Female$REGION,Female$HAPPY)
## 
##  Pearson's product-moment correlation
## 
## data:  Female$REGION and Female$HAPPY
## t = -1.4206, df = 1166, p-value = 0.1557
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.09869195  0.01583022
## sample estimates:
##         cor 
## -0.04156739
cor.test(Female$FamIncomeAdj, Female$HAPPY)
## 
##  Pearson's product-moment correlation
## 
## data:  Female$FamIncomeAdj and Female$HAPPY
## t = -7.3379, df = 1166, p-value = 4.055e-13
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.2642721 -0.1546003
## sample estimates:
##       cor 
## -0.210097

Stage Four: Regression Analysis

For Male

model_M <-lm(HAPPY~MARITAL+RACE+BORN+RELIG+REGION+FamIncomeAdj, data=Male)
summary(model_M)
## 
## Call:
## lm(formula = HAPPY ~ MARITAL + RACE + BORN + RELIG + REGION + 
##     FamIncomeAdj, data = Male)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.2254 -0.6536  0.0093  0.3144  1.5642 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   1.547e+00  9.425e-02  16.419  < 2e-16 ***
## MARITAL       9.576e-02  1.221e-02   7.846 1.14e-14 ***
## RACE         -3.238e-02  3.244e-02  -0.998 0.318370    
## BORN          9.273e-02  6.745e-02   1.375 0.169521    
## RELIG         8.629e-03  1.190e-02   0.725 0.468506    
## REGION        4.215e-03  8.487e-03   0.497 0.619540    
## FamIncomeAdj -4.658e-07  1.237e-07  -3.765 0.000177 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6252 on 961 degrees of freedom
## Multiple R-squared:  0.09419,    Adjusted R-squared:  0.08853 
## F-statistic: 16.65 on 6 and 961 DF,  p-value: < 2.2e-16
k1 <-predict(model_M, newdata = Male)
k2 <- data.frame(k1)
k <- cbind(k2, Male$HAPPY)
head(k, 5)
##         k1 Male$HAPPY
## 1 1.660342          2
## 2 1.710367          2
## 3 1.666686          2
## 4 1.923914          2
## 5 1.825159          3

Model Evaluation: Graphical Method

ols_vif_tol(model_M)
##      Variables Tolerance      VIF
## 1      MARITAL 0.8952999 1.116944
## 2         RACE 0.8318336 1.202164
## 3         BORN 0.8541626 1.170737
## 4        RELIG 0.9620493 1.039448
## 5       REGION 0.9712678 1.029582
## 6 FamIncomeAdj 0.9189858 1.088156
anova(model_M)
## Analysis of Variance Table
## 
## Response: HAPPY
##               Df Sum Sq Mean Sq F value    Pr(>F)    
## MARITAL        1  32.65  32.654 83.5434 < 2.2e-16 ***
## RACE           1   0.00   0.001  0.0036 0.9518563    
## BORN           1   0.72   0.723  1.8488 0.1742436    
## RELIG          1   0.06   0.062  0.1585 0.6905912    
## REGION         1   0.08   0.077  0.1978 0.6566060    
## FamIncomeAdj   1   5.54   5.540 14.1728 0.0001769 ***
## Residuals    961 375.62   0.391                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ols_eigen_cindex(model_M)
##   Eigenvalue Condition Index    intercept     MARITAL        RACE         BORN
## 1 5.49345159        1.000000 1.442748e-03 0.006520353 0.004618729 0.0020171343
## 2 0.67236252        2.858386 8.513697e-05 0.043456239 0.005954995 0.0001704501
## 3 0.31015050        4.208588 2.129409e-03 0.009127467 0.053522858 0.0063604918
## 4 0.24166729        4.767753 4.240522e-05 0.779470572 0.035561761 0.0087603480
## 5 0.15620146        5.930347 4.953600e-04 0.001369820 0.395205630 0.0119018094
## 6 0.09463381        7.619026 8.443584e-02 0.027360611 0.493572366 0.2824283458
## 7 0.03153283       13.199004 9.113691e-01 0.132694938 0.011563660 0.6883614206
##          RELIG       REGION FamIncomeAdj
## 1 0.0082402001 0.0048352280 8.010379e-03
## 2 0.0009123167 0.0006788444 7.524963e-01
## 3 0.8293881852 0.0429431351 3.617091e-05
## 4 0.1232174055 0.0407733894 1.289882e-01
## 5 0.0002177965 0.6400432351 1.239433e-02
## 6 0.0359076309 0.1798833142 6.690413e-02
## 7 0.0021164651 0.0908428538 3.117051e-02
ols_coll_diag(model_M)
## Tolerance and Variance Inflation Factor
## ---------------------------------------
##      Variables Tolerance      VIF
## 1      MARITAL 0.8952999 1.116944
## 2         RACE 0.8318336 1.202164
## 3         BORN 0.8541626 1.170737
## 4        RELIG 0.9620493 1.039448
## 5       REGION 0.9712678 1.029582
## 6 FamIncomeAdj 0.9189858 1.088156
## 
## 
## Eigenvalue and Condition Index
## ------------------------------
##   Eigenvalue Condition Index    intercept     MARITAL        RACE         BORN
## 1 5.49345159        1.000000 1.442748e-03 0.006520353 0.004618729 0.0020171343
## 2 0.67236252        2.858386 8.513697e-05 0.043456239 0.005954995 0.0001704501
## 3 0.31015050        4.208588 2.129409e-03 0.009127467 0.053522858 0.0063604918
## 4 0.24166729        4.767753 4.240522e-05 0.779470572 0.035561761 0.0087603480
## 5 0.15620146        5.930347 4.953600e-04 0.001369820 0.395205630 0.0119018094
## 6 0.09463381        7.619026 8.443584e-02 0.027360611 0.493572366 0.2824283458
## 7 0.03153283       13.199004 9.113691e-01 0.132694938 0.011563660 0.6883614206
##          RELIG       REGION FamIncomeAdj
## 1 0.0082402001 0.0048352280 8.010379e-03
## 2 0.0009123167 0.0006788444 7.524963e-01
## 3 0.8293881852 0.0429431351 3.617091e-05
## 4 0.1232174055 0.0407733894 1.289882e-01
## 5 0.0002177965 0.6400432351 1.239433e-02
## 6 0.0359076309 0.1798833142 6.690413e-02
## 7 0.0021164651 0.0908428538 3.117051e-02
ols_plot_resid_fit_spread(model_M)

ols_correlations(model_M)
##                  Correlations                   
## -----------------------------------------------
## Variable        Zero Order    Partial     Part  
## -----------------------------------------------
## MARITAL              0.281      0.245     0.241 
## RACE                 0.028     -0.032    -0.031 
## BORN                 0.016      0.044     0.042 
## RELIG                0.054      0.023     0.022 
## REGION               0.012      0.016     0.015 
## FamIncomeAdj        -0.178     -0.121    -0.116 
## -----------------------------------------------
ols_plot_obs_fit(model_M)
## Warning in geom_segment(data = d, aes(x = min(x), y = min(y), xend = max(x), : All aesthetics have length 1, but the data has 968 rows.
## ℹ Did you mean to use `annotate()`?

ols_plot_diagnostics(model_M)
## Warning in geom_segment(aes(x = min(x), y = min(y), xend = max(x), yend = max(y)), : All aesthetics have length 1, but the data has 968 rows.
## ℹ Did you mean to use `annotate()`?

ols_plot_added_variable(model_M)
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

ols_plot_comp_plus_resid(model_M)
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

For Female

model_F <-lm(HAPPY~MARITAL+RACE+BORN+RELIG+REGION+FamIncomeAdj, data=Male)
summary(model_F)
## 
## Call:
## lm(formula = HAPPY ~ MARITAL + RACE + BORN + RELIG + REGION + 
##     FamIncomeAdj, data = Male)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.2254 -0.6536  0.0093  0.3144  1.5642 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   1.547e+00  9.425e-02  16.419  < 2e-16 ***
## MARITAL       9.576e-02  1.221e-02   7.846 1.14e-14 ***
## RACE         -3.238e-02  3.244e-02  -0.998 0.318370    
## BORN          9.273e-02  6.745e-02   1.375 0.169521    
## RELIG         8.629e-03  1.190e-02   0.725 0.468506    
## REGION        4.215e-03  8.487e-03   0.497 0.619540    
## FamIncomeAdj -4.658e-07  1.237e-07  -3.765 0.000177 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6252 on 961 degrees of freedom
## Multiple R-squared:  0.09419,    Adjusted R-squared:  0.08853 
## F-statistic: 16.65 on 6 and 961 DF,  p-value: < 2.2e-16
Fk1 <-predict(model_M, newdata = Male)
Fk2 <- data.frame(Fk1)
Fk <- cbind(Fk2, Male$HAPPY)
head(Fk, 5)
##        Fk1 Male$HAPPY
## 1 1.660342          2
## 2 1.710367          2
## 3 1.666686          2
## 4 1.923914          2
## 5 1.825159          3

Model evaluation for female: Graphical Method

ols_vif_tol(model_F)
##      Variables Tolerance      VIF
## 1      MARITAL 0.8952999 1.116944
## 2         RACE 0.8318336 1.202164
## 3         BORN 0.8541626 1.170737
## 4        RELIG 0.9620493 1.039448
## 5       REGION 0.9712678 1.029582
## 6 FamIncomeAdj 0.9189858 1.088156
anova(model_F)
## Analysis of Variance Table
## 
## Response: HAPPY
##               Df Sum Sq Mean Sq F value    Pr(>F)    
## MARITAL        1  32.65  32.654 83.5434 < 2.2e-16 ***
## RACE           1   0.00   0.001  0.0036 0.9518563    
## BORN           1   0.72   0.723  1.8488 0.1742436    
## RELIG          1   0.06   0.062  0.1585 0.6905912    
## REGION         1   0.08   0.077  0.1978 0.6566060    
## FamIncomeAdj   1   5.54   5.540 14.1728 0.0001769 ***
## Residuals    961 375.62   0.391                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ols_eigen_cindex(model_F)
##   Eigenvalue Condition Index    intercept     MARITAL        RACE         BORN
## 1 5.49345159        1.000000 1.442748e-03 0.006520353 0.004618729 0.0020171343
## 2 0.67236252        2.858386 8.513697e-05 0.043456239 0.005954995 0.0001704501
## 3 0.31015050        4.208588 2.129409e-03 0.009127467 0.053522858 0.0063604918
## 4 0.24166729        4.767753 4.240522e-05 0.779470572 0.035561761 0.0087603480
## 5 0.15620146        5.930347 4.953600e-04 0.001369820 0.395205630 0.0119018094
## 6 0.09463381        7.619026 8.443584e-02 0.027360611 0.493572366 0.2824283458
## 7 0.03153283       13.199004 9.113691e-01 0.132694938 0.011563660 0.6883614206
##          RELIG       REGION FamIncomeAdj
## 1 0.0082402001 0.0048352280 8.010379e-03
## 2 0.0009123167 0.0006788444 7.524963e-01
## 3 0.8293881852 0.0429431351 3.617091e-05
## 4 0.1232174055 0.0407733894 1.289882e-01
## 5 0.0002177965 0.6400432351 1.239433e-02
## 6 0.0359076309 0.1798833142 6.690413e-02
## 7 0.0021164651 0.0908428538 3.117051e-02
ols_coll_diag(model_F)
## Tolerance and Variance Inflation Factor
## ---------------------------------------
##      Variables Tolerance      VIF
## 1      MARITAL 0.8952999 1.116944
## 2         RACE 0.8318336 1.202164
## 3         BORN 0.8541626 1.170737
## 4        RELIG 0.9620493 1.039448
## 5       REGION 0.9712678 1.029582
## 6 FamIncomeAdj 0.9189858 1.088156
## 
## 
## Eigenvalue and Condition Index
## ------------------------------
##   Eigenvalue Condition Index    intercept     MARITAL        RACE         BORN
## 1 5.49345159        1.000000 1.442748e-03 0.006520353 0.004618729 0.0020171343
## 2 0.67236252        2.858386 8.513697e-05 0.043456239 0.005954995 0.0001704501
## 3 0.31015050        4.208588 2.129409e-03 0.009127467 0.053522858 0.0063604918
## 4 0.24166729        4.767753 4.240522e-05 0.779470572 0.035561761 0.0087603480
## 5 0.15620146        5.930347 4.953600e-04 0.001369820 0.395205630 0.0119018094
## 6 0.09463381        7.619026 8.443584e-02 0.027360611 0.493572366 0.2824283458
## 7 0.03153283       13.199004 9.113691e-01 0.132694938 0.011563660 0.6883614206
##          RELIG       REGION FamIncomeAdj
## 1 0.0082402001 0.0048352280 8.010379e-03
## 2 0.0009123167 0.0006788444 7.524963e-01
## 3 0.8293881852 0.0429431351 3.617091e-05
## 4 0.1232174055 0.0407733894 1.289882e-01
## 5 0.0002177965 0.6400432351 1.239433e-02
## 6 0.0359076309 0.1798833142 6.690413e-02
## 7 0.0021164651 0.0908428538 3.117051e-02
ols_plot_resid_fit_spread(model_F)

ols_correlations(model_F)
##                  Correlations                   
## -----------------------------------------------
## Variable        Zero Order    Partial     Part  
## -----------------------------------------------
## MARITAL              0.281      0.245     0.241 
## RACE                 0.028     -0.032    -0.031 
## BORN                 0.016      0.044     0.042 
## RELIG                0.054      0.023     0.022 
## REGION               0.012      0.016     0.015 
## FamIncomeAdj        -0.178     -0.121    -0.116 
## -----------------------------------------------
ols_plot_obs_fit(model_F)
## Warning in geom_segment(data = d, aes(x = min(x), y = min(y), xend = max(x), : All aesthetics have length 1, but the data has 968 rows.
## ℹ Did you mean to use `annotate()`?

ols_plot_diagnostics(model_F)
## Warning in geom_segment(aes(x = min(x), y = min(y), xend = max(x), yend = max(y)), : All aesthetics have length 1, but the data has 968 rows.
## ℹ Did you mean to use `annotate()`?

ols_plot_added_variable(model_F)
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

ols_plot_comp_plus_resid(model_F)
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'