options(repos = c(CRAN = "https://cloud.r-project.org/"))
library(tidyverse) #Tidyverse collection of tidyr, dplyr,readr,ggplot2, etc.
## Warning: package 'ggplot2' was built under R version 4.3.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.3 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.5.0 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl) #readxl to read and import excel files
library(Hmisc)
##
## Attaching package: 'Hmisc'
##
## The following objects are masked from 'package:dplyr':
##
## src, summarize
##
## The following objects are masked from 'package:base':
##
## format.pval, units
library(gridExtra) #gridExtra to combine images side-by-side
##
## Attaching package: 'gridExtra'
##
## The following object is masked from 'package:dplyr':
##
## combine
library(olsrr)
##
## Attaching package: 'olsrr'
##
## The following object is masked from 'package:datasets':
##
## rivers
After necessary packages needed for the analysis and gain insights has beep unpacked, next thing needed to be done is to import the happiness dataset stored on my laptop. This is done with the code below
data <- read.csv("C:/Users/DELL/Desktop/Naomi_work/Naomi/Happiness.csv")
set.seed(123) # for reproducibility
head(data, 2)
## X YEAR ID_ HHTYPE1 COHORT BALLOT OVERSAMP WRKSTAT MARITAL DIVORCE SPWRKSTA
## 1 1 1972 1 NA 1949 -100 1 1 5 NA NA
## 2 2 1972 10 NA 1942 -100 1 1 1 2 1
## CHILDS AGE EDUC DEGREE SEX RACE RES16 REG16 MOBILE16 INCOM16 BORN PARBORN
## 1 0 23 16 3 2 1 5 2 3 3 NA NA
## 2 4 30 12 1 2 2 3 7 1 1 NA NA
## GRANBORN HOMPOP BABIES PRETEEN ADULTS EARNRS INCOME RINCOME INCOME72 INCOME77
## 1 NA 1 0 0 1 1 NA NA 4 NA
## 2 NA 7 2 2 2 2 NA NA 4 NA
## RINCOM77 INCOME82 RINCOM82 INCOME86 RINCOM86 INCOME91 RINCOM91 INCOME98
## 1 NA NA NA NA NA NA NA NA
## 2 NA NA NA NA NA NA NA NA
## RINCOM98 INCOME06 RINCOM06 REGION XNORCSIZ SRCBELT SIZE PARTYID POLVIEWS TAX
## 1 NA NA NA 3 3 3 72 2 NA NA
## 2 NA NA NA 7 2 5 127 0 NA NA
## RELIG FUND ATTEND RELITEN HAPPY HAPMAR HAPCOHAB HEALTH LIFE HELPFUL CONFINAN
## 1 3 3 2 NA 3 NA NA 2 NA 2 NA
## 2 1 1 8 NA 2 NA NA 3 NA 1 NA
## CONBUS CONCLERG CONEDUC CONFED CONLABOR CONPRESS CONMEDIC CONTV CONJUDGE
## 1 NA NA NA NA NA NA NA NA NA
## 2 NA NA NA NA NA NA NA NA NA
## CONSCI CONLEGIS CONARMY SOCREL SOCOMMUN SOCFREND SOCBAR JOBLOSE JOBFIND
## 1 NA NA NA NA NA NA NA NA NA
## 2 NA NA NA NA NA NA NA NA NA
## SATJOB SATFIN FINALTER FINRELA UNEMP TVHOURS JOBFIND1 EQINCOME MASEI10
## 1 3 3 1 3 NA NA NA NA NA
## 2 2 3 3 2 NA NA NA NA NA
## PASEI10 PRESTG10 SEI10 SPPRES10 SPSEI10 INCOME16 RINCOM16 MABORN PABORN
## 1 62 45 50.0 NA NA NA NA NA NA
## 2 32 25 12.6 25 36.4 NA NA NA NA
## CLASS_ TEENS WTSSCOMP HappyPartner KidsAtHome JobFind MomBornUS DadBornUS
## 1 3 0 0.4446 NA 0 NA NA NA
## 2 2 1 0.8893 NA 5 NA NA NA
## PersIncome FamIncome InflTarget InflNominal PersIncomeAdj FamIncomeAdj WEIGHT
## 1 NA 6996 281.3283 40.35 NA 48777.52 0.4446
## 2 NA 6996 294.4790 39.60 NA 52024.62 0.8893
## Z.Inc_Pers Z.Inc_Fam P.Inc_Pers P.Inc_Fam
## 1 NA -0.4570523 NA 0.3031335
## 2 NA -0.4211170 NA 0.3562670
str(data)
Twenty_18 <- data %>% filter(YEAR==2018)
Twenty_18_variables <- Twenty_18 %>% select(HAPPY, RACE, MARITAL,SEX, BORN, RELIG, REGION, FamIncomeAdj)
head(Twenty_18_variables, 2)
## HAPPY RACE MARITAL SEX BORN RELIG REGION FamIncomeAdj
## 1 2 1 5 1 1 11 1 NA
## 2 2 1 1 1 1 1 1 120264.4
(Twenty_18_variables %>% select(MARITAL)%>% is.na()%>% sum()/length(Twenty_18_variables$MARITAL))*100
## [1] 0.08532423
(Twenty_18_variables %>% select(BORN)%>% is.na()%>% sum()/length(Twenty_18_variables$BORN))*100
## [1] 0.04266212
(Twenty_18_variables %>% select(RELIG)%>% is.na()%>% sum()/length(Twenty_18_variables$RELIG))*100
## [1] 0.8959044
(Twenty_18_variables %>% select(REGION)%>% is.na()%>% sum()/length(Twenty_18_variables$REGION))*100
## [1] 0
(Twenty_18_variables %>% select(RACE)%>% is.na()%>% sum()/length(Twenty_18_variables$RACE))*100
## [1] 0
(Twenty_18_variables %>% select(HAPPY)%>% is.na()%>% sum()/length(Twenty_18_variables$HAPPY))*100
## [1] 0
(Twenty_18_variables %>% select(FamIncomeAdj)%>% is.na()%>% sum()/length(Twenty_18_variables$FamIncomeAdj))*100
## [1] 8.233788
(Twenty_18_variables %>% select(SEX)%>% is.na()%>% sum()/length(Twenty_18_variables$SEX))*100
## [1] 0
Twenty_18_variables <-na.omit(Twenty_18_variables)
summary(Twenty_18_variables)
## HAPPY RACE MARITAL SEX
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:1.000 1st Qu.:1.000 1st Qu.:1.000 1st Qu.:1.000
## Median :2.000 Median :1.000 Median :2.000 Median :2.000
## Mean :1.846 Mean :1.381 Mean :2.654 Mean :1.547
## 3rd Qu.:2.000 3rd Qu.:2.000 3rd Qu.:5.000 3rd Qu.:2.000
## Max. :3.000 Max. :3.000 Max. :5.000 Max. :2.000
## BORN RELIG REGION FamIncomeAdj
## Min. :1.000 Min. : 1.00 Min. :1.000 Min. : 521.9
## 1st Qu.:1.000 1st Qu.: 1.00 1st Qu.:3.000 1st Qu.: 32981.3
## Median :1.000 Median : 2.00 Median :5.000 Median : 68207.8
## Mean :1.121 Mean : 2.27 Mean :5.249 Mean :118102.6
## 3rd Qu.:1.000 3rd Qu.: 4.00 3rd Qu.:7.000 3rd Qu.:124014.3
## Max. :2.000 Max. :13.00 Max. :9.000 Max. :668794.3
ggplot(Twenty_18_variables, aes(x = HAPPY, fill = factor(SEX), color=factor(SEX))) +
geom_bar(position = "dodge") +
geom_text(stat = "count", aes(label = after_stat(count), vjust = -0.5), position = position_dodge(0.9)) +
scale_fill_manual(values = c("male" = "blue", "female" = "pink")) +
labs(x = "Happiness", y = "Count") +
theme_minimal()
## Warning: No shared levels found between `names(values)` of the manual scale and the
## data's fill values.
## No shared levels found between `names(values)` of the manual scale and the
## data's fill values.
## No shared levels found between `names(values)` of the manual scale and the
## data's fill values.
ggplot(Twenty_18_variables, aes(x = HAPPY, fill = factor(RACE), color=factor(RACE))) +
geom_bar(position = "dodge") +
geom_text(stat = "count", aes(label = after_stat(count), vjust = -0.5), position = position_dodge(0.9)) +
scale_fill_manual(values = c("male" = "blue", "female" = "pink")) +
labs(x = "Happiness", y = "Count") +
theme_minimal()
## Warning: No shared levels found between `names(values)` of the manual scale and the
## data's fill values.
## No shared levels found between `names(values)` of the manual scale and the
## data's fill values.
## No shared levels found between `names(values)` of the manual scale and the
## data's fill values.
ggplot(Twenty_18_variables, aes(x = HAPPY, fill = factor(BORN), color=factor(BORN))) +
geom_bar(position = "dodge") +
geom_text(stat = "count", aes(label = after_stat(count), vjust = -0.5), position = position_dodge(0.9)) +
scale_fill_manual(values = c("male" = "blue", "female" = "pink")) +
labs(x = "Happiness", y = "Count") +
theme_minimal()
## Warning: No shared levels found between `names(values)` of the manual scale and the
## data's fill values.
## No shared levels found between `names(values)` of the manual scale and the
## data's fill values.
## No shared levels found between `names(values)` of the manual scale and the
## data's fill values.
ggplot(Twenty_18_variables, aes(x = HAPPY, fill = factor(RELIG), color=factor(RELIG))) +
geom_bar(position = "dodge") +
geom_text(stat = "count", aes(label = after_stat(count), vjust = -0.5), position = position_dodge(0.9)) +
scale_fill_manual(values = c("male" = "blue", "female" = "pink")) +
labs(x = "Happiness", y = "Count") +
theme_minimal()
## Warning: No shared levels found between `names(values)` of the manual scale and the
## data's fill values.
## No shared levels found between `names(values)` of the manual scale and the
## data's fill values.
## No shared levels found between `names(values)` of the manual scale and the
## data's fill values.
ggplot(Twenty_18_variables, aes(x = HAPPY, fill = factor(REGION), color=factor(REGION))) +
geom_bar(position = "dodge") +
geom_text(stat = "count", aes(label = after_stat(count), vjust = -0.5), position = position_dodge(0.9)) +
scale_fill_manual(values = c("male" = "blue", "female" = "pink")) +
labs(x = "Happiness", y = "Count") +
theme_minimal()
## Warning: No shared levels found between `names(values)` of the manual scale and the
## data's fill values.
## No shared levels found between `names(values)` of the manual scale and the
## data's fill values.
## No shared levels found between `names(values)` of the manual scale and the
## data's fill values.
Male <- Twenty_18_variables %>% filter(SEX==1)
Female <- Twenty_18_variables %>% filter(SEX==2)
rcorr(as.matrix(Twenty_18_variables))
## HAPPY RACE MARITAL SEX BORN RELIG REGION FamIncomeAdj
## HAPPY 1.00 0.04 0.25 0.00 0.00 0.04 -0.02 -0.19
## RACE 0.04 1.00 0.13 0.01 0.34 0.06 0.14 -0.13
## MARITAL 0.25 0.13 1.00 -0.02 -0.10 0.10 -0.02 -0.25
## SEX 0.00 0.01 -0.02 1.00 0.01 -0.04 0.01 -0.05
## BORN 0.00 0.34 -0.10 0.01 1.00 0.07 0.11 -0.04
## RELIG 0.04 0.06 0.10 -0.04 0.07 1.00 0.01 0.02
## REGION -0.02 0.14 -0.02 0.01 0.11 0.01 1.00 0.01
## FamIncomeAdj -0.19 -0.13 -0.25 -0.05 -0.04 0.02 0.01 1.00
##
## n= 2136
##
##
## P
## HAPPY RACE MARITAL SEX BORN RELIG REGION FamIncomeAdj
## HAPPY 0.0386 0.0000 0.8644 0.8393 0.0605 0.4256 0.0000
## RACE 0.0386 0.0000 0.6565 0.0000 0.0061 0.0000 0.0000
## MARITAL 0.0000 0.0000 0.2914 0.0000 0.0000 0.3292 0.0000
## SEX 0.8644 0.6565 0.2914 0.6534 0.0684 0.5588 0.0350
## BORN 0.8393 0.0000 0.0000 0.6534 0.0012 0.0000 0.0730
## RELIG 0.0605 0.0061 0.0000 0.0684 0.0012 0.7535 0.4144
## REGION 0.4256 0.0000 0.3292 0.5588 0.0000 0.7535 0.5034
## FamIncomeAdj 0.0000 0.0000 0.0000 0.0350 0.0730 0.4144 0.5034
pairs(Male, col="red")
pairs(Female, col="red")
ggplot(Male, aes(MARITAL, HAPPY))+geom_point()
ggplot(Male, aes(RACE, HAPPY))+geom_point()
ggplot(Male, aes(BORN, HAPPY))+geom_point()
ggplot(Male, aes(RELIG, HAPPY))+geom_point()
ggplot(Male, aes(REGION, HAPPY))+geom_point()
ggplot(Male, aes(FamIncomeAdj, HAPPY))+geom_point()
cor.test(Male$MARITAL, Male$HAPPY)
##
## Pearson's product-moment correlation
##
## data: Male$MARITAL and Male$HAPPY
## t = 9.0868, df = 966, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.2215239 0.3376565
## sample estimates:
## cor
## 0.2806169
cor.test(Male$RACE, Male$HAPPY)
##
## Pearson's product-moment correlation
##
## data: Male$RACE and Male$HAPPY
## t = 0.86505, df = 966, p-value = 0.3872
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.03525007 0.09067254
## sample estimates:
## cor
## 0.02782161
cor.test(Male$BORN, Male$HAPPY)
##
## Pearson's product-moment correlation
##
## data: Male$BORN and Male$HAPPY
## t = 0.50412, df = 966, p-value = 0.6143
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.04684003 0.07914672
## sample estimates:
## cor
## 0.01621772
cor.test(Male$RELIG, Male$HAPPY)
##
## Pearson's product-moment correlation
##
## data: Male$RELIG and Male$HAPPY
## t = 1.6885, df = 966, p-value = 0.09163
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.008792515 0.116857875
## sample estimates:
## cor
## 0.05424742
cor.test(Male$REGION,Male$HAPPY)
##
## Pearson's product-moment correlation
##
## data: Male$REGION and Male$HAPPY
## t = 0.37955, df = 966, p-value = 0.7044
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.05083809 0.07516297
## sample estimates:
## cor
## 0.01221091
cor.test(Male$FamIncomeAdj, Male$HAPPY)
##
## Pearson's product-moment correlation
##
## data: Male$FamIncomeAdj and Male$HAPPY
## t = -5.638, df = 966, p-value = 2.258e-08
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.2388103 -0.1167898
## sample estimates:
## cor
## -0.1784862
ggplot(Female, aes(MARITAL, HAPPY))+geom_point()
ggplot(Female, aes(RACE, HAPPY))+geom_point()
ggplot(Female, aes(BORN, HAPPY))+geom_point()
ggplot(Female, aes(RELIG, HAPPY))+geom_point()
ggplot(Female, aes(REGION, HAPPY))+geom_point()
ggplot(Female, aes(FamIncomeAdj, HAPPY))+geom_point()
cor.test(Female$MARITAL, Female$HAPPY)
##
## Pearson's product-moment correlation
##
## data: Female$MARITAL and Female$HAPPY
## t = 7.9588, df = 1166, p-value = 4.092e-15
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.1718699 0.2806971
## sample estimates:
## cor
## 0.226992
cor.test(Female$RACE, Female$HAPPY)
##
## Pearson's product-moment correlation
##
## data: Female$RACE and Female$HAPPY
## t = 2.0304, df = 1166, p-value = 0.04254
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.002002704 0.116319597
## sample estimates:
## cor
## 0.05935575
cor.test(Female$BORN, Female$HAPPY)
##
## Pearson's product-moment correlation
##
## data: Female$BORN and Female$HAPPY
## t = -0.72736, df = 1166, p-value = 0.4672
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.07856009 0.03610779
## sample estimates:
## cor
## -0.02129618
cor.test(Female$RELIG, Female$HAPPY)
##
## Pearson's product-moment correlation
##
## data: Female$RELIG and Female$HAPPY
## t = 1.0253, df = 1166, p-value = 0.3055
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.02739553 0.08722122
## sample estimates:
## cor
## 0.03001149
cor.test(Female$REGION,Female$HAPPY)
##
## Pearson's product-moment correlation
##
## data: Female$REGION and Female$HAPPY
## t = -1.4206, df = 1166, p-value = 0.1557
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.09869195 0.01583022
## sample estimates:
## cor
## -0.04156739
cor.test(Female$FamIncomeAdj, Female$HAPPY)
##
## Pearson's product-moment correlation
##
## data: Female$FamIncomeAdj and Female$HAPPY
## t = -7.3379, df = 1166, p-value = 4.055e-13
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.2642721 -0.1546003
## sample estimates:
## cor
## -0.210097
model_M <-lm(HAPPY~MARITAL+RACE+BORN+RELIG+REGION+FamIncomeAdj, data=Male)
summary(model_M)
##
## Call:
## lm(formula = HAPPY ~ MARITAL + RACE + BORN + RELIG + REGION +
## FamIncomeAdj, data = Male)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.2254 -0.6536 0.0093 0.3144 1.5642
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.547e+00 9.425e-02 16.419 < 2e-16 ***
## MARITAL 9.576e-02 1.221e-02 7.846 1.14e-14 ***
## RACE -3.238e-02 3.244e-02 -0.998 0.318370
## BORN 9.273e-02 6.745e-02 1.375 0.169521
## RELIG 8.629e-03 1.190e-02 0.725 0.468506
## REGION 4.215e-03 8.487e-03 0.497 0.619540
## FamIncomeAdj -4.658e-07 1.237e-07 -3.765 0.000177 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6252 on 961 degrees of freedom
## Multiple R-squared: 0.09419, Adjusted R-squared: 0.08853
## F-statistic: 16.65 on 6 and 961 DF, p-value: < 2.2e-16
k1 <-predict(model_M, newdata = Male)
k2 <- data.frame(k1)
k <- cbind(k2, Male$HAPPY)
head(k, 5)
## k1 Male$HAPPY
## 1 1.660342 2
## 2 1.710367 2
## 3 1.666686 2
## 4 1.923914 2
## 5 1.825159 3
ols_vif_tol(model_M)
## Variables Tolerance VIF
## 1 MARITAL 0.8952999 1.116944
## 2 RACE 0.8318336 1.202164
## 3 BORN 0.8541626 1.170737
## 4 RELIG 0.9620493 1.039448
## 5 REGION 0.9712678 1.029582
## 6 FamIncomeAdj 0.9189858 1.088156
anova(model_M)
## Analysis of Variance Table
##
## Response: HAPPY
## Df Sum Sq Mean Sq F value Pr(>F)
## MARITAL 1 32.65 32.654 83.5434 < 2.2e-16 ***
## RACE 1 0.00 0.001 0.0036 0.9518563
## BORN 1 0.72 0.723 1.8488 0.1742436
## RELIG 1 0.06 0.062 0.1585 0.6905912
## REGION 1 0.08 0.077 0.1978 0.6566060
## FamIncomeAdj 1 5.54 5.540 14.1728 0.0001769 ***
## Residuals 961 375.62 0.391
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ols_eigen_cindex(model_M)
## Eigenvalue Condition Index intercept MARITAL RACE BORN
## 1 5.49345159 1.000000 1.442748e-03 0.006520353 0.004618729 0.0020171343
## 2 0.67236252 2.858386 8.513697e-05 0.043456239 0.005954995 0.0001704501
## 3 0.31015050 4.208588 2.129409e-03 0.009127467 0.053522858 0.0063604918
## 4 0.24166729 4.767753 4.240522e-05 0.779470572 0.035561761 0.0087603480
## 5 0.15620146 5.930347 4.953600e-04 0.001369820 0.395205630 0.0119018094
## 6 0.09463381 7.619026 8.443584e-02 0.027360611 0.493572366 0.2824283458
## 7 0.03153283 13.199004 9.113691e-01 0.132694938 0.011563660 0.6883614206
## RELIG REGION FamIncomeAdj
## 1 0.0082402001 0.0048352280 8.010379e-03
## 2 0.0009123167 0.0006788444 7.524963e-01
## 3 0.8293881852 0.0429431351 3.617091e-05
## 4 0.1232174055 0.0407733894 1.289882e-01
## 5 0.0002177965 0.6400432351 1.239433e-02
## 6 0.0359076309 0.1798833142 6.690413e-02
## 7 0.0021164651 0.0908428538 3.117051e-02
ols_coll_diag(model_M)
## Tolerance and Variance Inflation Factor
## ---------------------------------------
## Variables Tolerance VIF
## 1 MARITAL 0.8952999 1.116944
## 2 RACE 0.8318336 1.202164
## 3 BORN 0.8541626 1.170737
## 4 RELIG 0.9620493 1.039448
## 5 REGION 0.9712678 1.029582
## 6 FamIncomeAdj 0.9189858 1.088156
##
##
## Eigenvalue and Condition Index
## ------------------------------
## Eigenvalue Condition Index intercept MARITAL RACE BORN
## 1 5.49345159 1.000000 1.442748e-03 0.006520353 0.004618729 0.0020171343
## 2 0.67236252 2.858386 8.513697e-05 0.043456239 0.005954995 0.0001704501
## 3 0.31015050 4.208588 2.129409e-03 0.009127467 0.053522858 0.0063604918
## 4 0.24166729 4.767753 4.240522e-05 0.779470572 0.035561761 0.0087603480
## 5 0.15620146 5.930347 4.953600e-04 0.001369820 0.395205630 0.0119018094
## 6 0.09463381 7.619026 8.443584e-02 0.027360611 0.493572366 0.2824283458
## 7 0.03153283 13.199004 9.113691e-01 0.132694938 0.011563660 0.6883614206
## RELIG REGION FamIncomeAdj
## 1 0.0082402001 0.0048352280 8.010379e-03
## 2 0.0009123167 0.0006788444 7.524963e-01
## 3 0.8293881852 0.0429431351 3.617091e-05
## 4 0.1232174055 0.0407733894 1.289882e-01
## 5 0.0002177965 0.6400432351 1.239433e-02
## 6 0.0359076309 0.1798833142 6.690413e-02
## 7 0.0021164651 0.0908428538 3.117051e-02
ols_plot_resid_fit_spread(model_M)
ols_correlations(model_M)
## Correlations
## -----------------------------------------------
## Variable Zero Order Partial Part
## -----------------------------------------------
## MARITAL 0.281 0.245 0.241
## RACE 0.028 -0.032 -0.031
## BORN 0.016 0.044 0.042
## RELIG 0.054 0.023 0.022
## REGION 0.012 0.016 0.015
## FamIncomeAdj -0.178 -0.121 -0.116
## -----------------------------------------------
ols_plot_obs_fit(model_M)
## Warning in geom_segment(data = d, aes(x = min(x), y = min(y), xend = max(x), : All aesthetics have length 1, but the data has 968 rows.
## ℹ Did you mean to use `annotate()`?
ols_plot_diagnostics(model_M)
## Warning in geom_segment(aes(x = min(x), y = min(y), xend = max(x), yend = max(y)), : All aesthetics have length 1, but the data has 968 rows.
## ℹ Did you mean to use `annotate()`?
ols_plot_added_variable(model_M)
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
ols_plot_comp_plus_resid(model_M)
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
model_F <-lm(HAPPY~MARITAL+RACE+BORN+RELIG+REGION+FamIncomeAdj, data=Male)
summary(model_F)
##
## Call:
## lm(formula = HAPPY ~ MARITAL + RACE + BORN + RELIG + REGION +
## FamIncomeAdj, data = Male)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.2254 -0.6536 0.0093 0.3144 1.5642
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.547e+00 9.425e-02 16.419 < 2e-16 ***
## MARITAL 9.576e-02 1.221e-02 7.846 1.14e-14 ***
## RACE -3.238e-02 3.244e-02 -0.998 0.318370
## BORN 9.273e-02 6.745e-02 1.375 0.169521
## RELIG 8.629e-03 1.190e-02 0.725 0.468506
## REGION 4.215e-03 8.487e-03 0.497 0.619540
## FamIncomeAdj -4.658e-07 1.237e-07 -3.765 0.000177 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6252 on 961 degrees of freedom
## Multiple R-squared: 0.09419, Adjusted R-squared: 0.08853
## F-statistic: 16.65 on 6 and 961 DF, p-value: < 2.2e-16
Fk1 <-predict(model_M, newdata = Male)
Fk2 <- data.frame(Fk1)
Fk <- cbind(Fk2, Male$HAPPY)
head(Fk, 5)
## Fk1 Male$HAPPY
## 1 1.660342 2
## 2 1.710367 2
## 3 1.666686 2
## 4 1.923914 2
## 5 1.825159 3
ols_vif_tol(model_F)
## Variables Tolerance VIF
## 1 MARITAL 0.8952999 1.116944
## 2 RACE 0.8318336 1.202164
## 3 BORN 0.8541626 1.170737
## 4 RELIG 0.9620493 1.039448
## 5 REGION 0.9712678 1.029582
## 6 FamIncomeAdj 0.9189858 1.088156
anova(model_F)
## Analysis of Variance Table
##
## Response: HAPPY
## Df Sum Sq Mean Sq F value Pr(>F)
## MARITAL 1 32.65 32.654 83.5434 < 2.2e-16 ***
## RACE 1 0.00 0.001 0.0036 0.9518563
## BORN 1 0.72 0.723 1.8488 0.1742436
## RELIG 1 0.06 0.062 0.1585 0.6905912
## REGION 1 0.08 0.077 0.1978 0.6566060
## FamIncomeAdj 1 5.54 5.540 14.1728 0.0001769 ***
## Residuals 961 375.62 0.391
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ols_eigen_cindex(model_F)
## Eigenvalue Condition Index intercept MARITAL RACE BORN
## 1 5.49345159 1.000000 1.442748e-03 0.006520353 0.004618729 0.0020171343
## 2 0.67236252 2.858386 8.513697e-05 0.043456239 0.005954995 0.0001704501
## 3 0.31015050 4.208588 2.129409e-03 0.009127467 0.053522858 0.0063604918
## 4 0.24166729 4.767753 4.240522e-05 0.779470572 0.035561761 0.0087603480
## 5 0.15620146 5.930347 4.953600e-04 0.001369820 0.395205630 0.0119018094
## 6 0.09463381 7.619026 8.443584e-02 0.027360611 0.493572366 0.2824283458
## 7 0.03153283 13.199004 9.113691e-01 0.132694938 0.011563660 0.6883614206
## RELIG REGION FamIncomeAdj
## 1 0.0082402001 0.0048352280 8.010379e-03
## 2 0.0009123167 0.0006788444 7.524963e-01
## 3 0.8293881852 0.0429431351 3.617091e-05
## 4 0.1232174055 0.0407733894 1.289882e-01
## 5 0.0002177965 0.6400432351 1.239433e-02
## 6 0.0359076309 0.1798833142 6.690413e-02
## 7 0.0021164651 0.0908428538 3.117051e-02
ols_coll_diag(model_F)
## Tolerance and Variance Inflation Factor
## ---------------------------------------
## Variables Tolerance VIF
## 1 MARITAL 0.8952999 1.116944
## 2 RACE 0.8318336 1.202164
## 3 BORN 0.8541626 1.170737
## 4 RELIG 0.9620493 1.039448
## 5 REGION 0.9712678 1.029582
## 6 FamIncomeAdj 0.9189858 1.088156
##
##
## Eigenvalue and Condition Index
## ------------------------------
## Eigenvalue Condition Index intercept MARITAL RACE BORN
## 1 5.49345159 1.000000 1.442748e-03 0.006520353 0.004618729 0.0020171343
## 2 0.67236252 2.858386 8.513697e-05 0.043456239 0.005954995 0.0001704501
## 3 0.31015050 4.208588 2.129409e-03 0.009127467 0.053522858 0.0063604918
## 4 0.24166729 4.767753 4.240522e-05 0.779470572 0.035561761 0.0087603480
## 5 0.15620146 5.930347 4.953600e-04 0.001369820 0.395205630 0.0119018094
## 6 0.09463381 7.619026 8.443584e-02 0.027360611 0.493572366 0.2824283458
## 7 0.03153283 13.199004 9.113691e-01 0.132694938 0.011563660 0.6883614206
## RELIG REGION FamIncomeAdj
## 1 0.0082402001 0.0048352280 8.010379e-03
## 2 0.0009123167 0.0006788444 7.524963e-01
## 3 0.8293881852 0.0429431351 3.617091e-05
## 4 0.1232174055 0.0407733894 1.289882e-01
## 5 0.0002177965 0.6400432351 1.239433e-02
## 6 0.0359076309 0.1798833142 6.690413e-02
## 7 0.0021164651 0.0908428538 3.117051e-02
ols_plot_resid_fit_spread(model_F)
ols_correlations(model_F)
## Correlations
## -----------------------------------------------
## Variable Zero Order Partial Part
## -----------------------------------------------
## MARITAL 0.281 0.245 0.241
## RACE 0.028 -0.032 -0.031
## BORN 0.016 0.044 0.042
## RELIG 0.054 0.023 0.022
## REGION 0.012 0.016 0.015
## FamIncomeAdj -0.178 -0.121 -0.116
## -----------------------------------------------
ols_plot_obs_fit(model_F)
## Warning in geom_segment(data = d, aes(x = min(x), y = min(y), xend = max(x), : All aesthetics have length 1, but the data has 968 rows.
## ℹ Did you mean to use `annotate()`?
ols_plot_diagnostics(model_F)
## Warning in geom_segment(aes(x = min(x), y = min(y), xend = max(x), yend = max(y)), : All aesthetics have length 1, but the data has 968 rows.
## ℹ Did you mean to use `annotate()`?
ols_plot_added_variable(model_F)
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
ols_plot_comp_plus_resid(model_F)
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'