##Computer exam R
##Loading the packages
library("tidyverse")
## Warning: package 'tidyverse' was built under R version 4.0.5
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.3 v purrr 0.3.4
## v tibble 3.1.1 v dplyr 1.0.6
## v tidyr 1.1.3 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.1
## Warning: package 'ggplot2' was built under R version 4.0.5
## Warning: package 'tibble' was built under R version 4.0.5
## Warning: package 'tidyr' was built under R version 4.0.5
## Warning: package 'dplyr' was built under R version 4.0.5
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library('yarrr')
## Warning: package 'yarrr' was built under R version 4.0.5
## Loading required package: jpeg
## Warning: package 'jpeg' was built under R version 4.0.5
## Loading required package: BayesFactor
## Warning: package 'BayesFactor' was built under R version 4.0.5
## Loading required package: coda
## Warning: package 'coda' was built under R version 4.0.5
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
## ************
## Welcome to BayesFactor 0.9.12-4.2. If you have questions, please contact Richard Morey (richarddmorey@gmail.com).
##
## Type BFManual() to open the manual.
## ************
## Loading required package: circlize
## Warning: package 'circlize' was built under R version 4.0.5
## ========================================
## circlize version 0.4.13
## CRAN page: https://cran.r-project.org/package=circlize
## Github page: https://github.com/jokergoo/circlize
## Documentation: https://jokergoo.github.io/circlize_book/book/
##
## If you use it in published research, please cite:
## Gu, Z. circlize implements and enhances circular visualization
## in R. Bioinformatics 2014.
##
## This message can be suppressed by:
## suppressPackageStartupMessages(library(circlize))
## ========================================
## yarrr v0.1.5. Citation info at citation('yarrr'). Package guide at yarrr.guide()
## Email me at Nathaniel.D.Phillips.is@gmail.com
##
## Attaching package: 'yarrr'
## The following object is masked from 'package:ggplot2':
##
## diamonds
library('pastecs')
## Warning: package 'pastecs' was built under R version 4.0.5
##
## Attaching package: 'pastecs'
## The following objects are masked from 'package:dplyr':
##
## first, last
## The following object is masked from 'package:tidyr':
##
## extract
library("corrplot")
## corrplot 0.90 loaded
library("stats")
library("stargazer")
##
## Please cite as:
## Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
library("car")
## Warning: package 'car' was built under R version 4.0.5
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
## The following object is masked from 'package:purrr':
##
## some
library("gap")
## Warning: package 'gap' was built under R version 4.0.5
## gap version 1.2.3-1
##
## Attaching package: 'gap'
## The following object is masked from 'package:car':
##
## logit
library("olsrr")
## Warning: package 'olsrr' was built under R version 4.0.5
##
## Attaching package: 'olsrr'
## The following object is masked from 'package:datasets':
##
## rivers
library("skedastic")
## Warning: package 'skedastic' was built under R version 4.0.5
library("leaps")
## Warning: package 'leaps' was built under R version 4.0.5
library("MASS")
## Warning: package 'MASS' was built under R version 4.0.5
##
## Attaching package: 'MASS'
## The following object is masked from 'package:olsrr':
##
## cement
## The following object is masked from 'package:dplyr':
##
## select
##Setting up the working directory
setwd("C:/Econometrics/CE")
## Reading the data as csv. file
satisf<-read.csv("C:/Econometrics/CE/satisfaction.csv")
head (satisf)
## SATISF GDP OECD LE EDUC MORT HEXP EDEXP UNEM TRADE
## 1 3.6 1.377808 0 64.723 5.169435 213 4.066702 2.77576 3.8 63.17296
## 2 3.7 1.216050 0 56.112 1.583342 348 5.955873 3.06664 1.7 78.37647
## 3 3.7 2.173523 0 63.451 4.451000 293 5.983109 5.63358 10.4 68.00035
## 4 3.9 2.789078 0 71.916 5.772000 260 5.690086 2.60388 7.1 113.58184
## 5 3.9 5.631302 0 58.793 6.090000 332 2.452738 6.22079 10.0 122.09243
## 6 4.0 1.654486 0 61.530 5.110000 363 7.277308 6.18120 4.3 81.27242
stat.desc(satisf)
## SATISF GDP OECD LE EDUC
## nbr.val 110.0000000 110.0000000 110.00000000 110.0000000 110.0000000
## nbr.null 0.0000000 0.0000000 74.00000000 0.0000000 0.0000000
## nbr.na 0.0000000 0.0000000 0.00000000 0.0000000 0.0000000
## min 3.6000000 0.7823873 0.00000000 45.5610000 1.2510000
## max 7.8000000 71.4748882 1.00000000 83.5800000 12.9478252
## range 4.2000000 70.6925009 1.00000000 38.0190000 11.6968252
## sum 617.9000000 2000.7516568 36.00000000 7948.2880000 959.9153144
## median 5.5000000 13.9433323 0.00000000 74.1760000 9.2637905
## mean 5.6172727 18.1886514 0.32727273 72.2571636 8.7265029
## SE.mean 0.1036278 1.4662687 0.04494291 0.8186187 0.2786660
## CI.mean.0.95 0.2053869 2.9060969 0.08907538 1.6224757 0.5523070
## var 1.1812585 236.4938288 0.22218515 73.7150245 8.5420234
## std.dev 1.0868572 15.3783559 0.47136520 8.5857454 2.9226740
## coef.var 0.1934849 0.8454918 1.44028256 0.1188221 0.3349193
## MORT HEXP EDEXP UNEM TRADE
## nbr.val 1.100000e+02 110.0000000 110.0000000 110.0000000 1.100000e+02
## nbr.null 0.000000e+00 0.0000000 0.0000000 0.0000000 0.000000e+00
## nbr.na 0.000000e+00 0.0000000 0.0000000 0.0000000 0.000000e+00
## min 6.400000e+01 2.4527379 1.3453600 0.6000000 2.654395e+01
## max 5.830000e+02 19.4816646 12.9821800 28.6000000 3.791448e+02
## range 5.190000e+02 17.0289267 11.6368200 28.0000000 3.526008e+02
## sum 2.208700e+04 810.8150472 535.6914300 1002.4000000 1.052987e+04
## median 1.785000e+02 6.7341982 5.0181600 7.5000000 8.389420e+01
## mean 2.007909e+02 7.3710459 4.8699221 9.1127273 9.572608e+01
## SE.mean 1.067640e+01 0.2925982 0.1752819 0.5734567 4.638582e+00
## CI.mean.0.95 2.116028e+01 0.5799202 0.3474030 1.1365725 9.193518e+00
## var 1.253841e+04 9.4175098 3.3796111 36.1737815 2.366808e+03
## std.dev 1.119750e+02 3.0687962 1.8383719 6.0144644 4.864986e+01
## coef.var 5.576698e-01 0.4163312 0.3774951 0.6600071 5.082195e-01
view(satisf)
### Changing names of columns to variable names
satis <- satisf$SATISF
gdp <- satisf$GDP
oecd <- satisf$OECD
le <- satisf$LE
educ <- satisf$EDUC
mort <- satisf$MORT
hexp <- satisf$HEXP
edexp <- satisf$EDEXP
unem <- satisf$UNEM
trade <- satisf$TRADE
### correlation Analysis
cor(satisf)
## SATISF GDP OECD LE EDUC MORT
## SATISF 1.00000000 0.7363276 0.59952233 0.68144667 0.61805649 -0.58862202
## GDP 0.73632760 1.0000000 0.50243928 0.71144116 0.70405188 -0.66123877
## OECD 0.59952233 0.5024393 1.00000000 0.42407698 0.35480164 -0.39012983
## LE 0.68144667 0.7114412 0.42407698 1.00000000 0.70221420 -0.91845238
## EDUC 0.61805649 0.7040519 0.35480164 0.70221420 1.00000000 -0.52391783
## MORT -0.58862202 -0.6612388 -0.39012983 -0.91845238 -0.52391783 1.00000000
## HEXP 0.33498251 0.3004548 0.17590228 0.13053880 0.21128462 -0.05630422
## EDEXP 0.36896864 0.2300618 0.13679572 0.03523426 0.22891043 0.01864908
## UNEM -0.20947422 -0.1545970 -0.29272976 -0.16948097 0.04114524 0.25586231
## TRADE 0.08583338 0.2596846 0.03904115 0.09715033 0.20201366 -0.03088821
## HEXP EDEXP UNEM TRADE
## SATISF 0.334982515 0.36896864 -0.20947422 0.085833379
## GDP 0.300454812 0.23006177 -0.15459704 0.259684584
## OECD 0.175902276 0.13679572 -0.29272976 0.039041153
## LE 0.130538796 0.03523426 -0.16948097 0.097150331
## EDUC 0.211284617 0.22891043 0.04114524 0.202013661
## MORT -0.056304220 0.01864908 0.25586231 -0.030888214
## HEXP 1.000000000 0.25085468 0.06767495 0.004961089
## EDEXP 0.250854679 1.00000000 0.23174783 0.233257624
## UNEM 0.067674947 0.23174783 1.00000000 -0.015539530
## TRADE 0.004961089 0.23325762 -0.01553953 1.000000000
M<- cor(satisf)
corrplot(M, type = "upper", order = "hclust", tl.col = "black", tl.srt = 45)

# LE correlation greater than 0.7 with GDP, Edu and Mort
# Edu correlation greater than 0.7 with GDP
# Advice: remove LE and edu
### checking relationship between each exp. variable and satisfaction
# 1. GDP
plot(x =gdp, # X coordinates
y = satis, # y-coordinates
main = 'Satisfaction versus GDP',
xlab = 'GDP per Capita (thousand dollars)', # x-axis label
ylab = 'Satisfaction (from 1-10)', # y-axis label
pch = 16, # Filled circles
col = gray(.0, .1)) # Transparent gray
grid() # Add gridlines
model1<- lm(formula = satis ~ gdp, data = satisf)
abline(model1, col = 'blue') # Add regression line to plot

# observation: strong positive linear relationship,
# may show heteroscedasticity
# cor. coef. = 0.7363276
# 2. Life expectancy
plot(x = le,
y = satis,
main = 'Satisfaction versus Life Expectancy',
xlab = 'Life Expectancy at birth (in years)',
ylab = 'Satisfaction (from 1-10)',
pch = 16,
col = gray(.0, .1))
grid()
model2 <- lm(formula = satis ~ le, data = satisf)
abline(model2, col='blue')

# observation: moderate positive relationship
# may be non linear or heteroscedastic
# cor. coeff. = 0.68144667
# 3. Govt. health exp.
plot(x = hexp,
y = satis,
main = 'Satisfaction versus Govt. Health Expenditure',
xlab = 'Total Govt. Health Expenditure (as a % of GDP)',
ylab = 'Satisfaction (from 1-10)',
pch = 16,
col = gray(.0, .1))
grid()
model3 <- lm(formula = satis ~ hexp, data = satisf)
abline(model3, col = 'blue')

# observation weak positive relationship
# cor. coeff = 0.334982515,
# 4. Govt. educ exp.
plot(x = edexp,
y = satis,
main = 'Satisfaction versus Govt. Education Expenditure',
xlab = 'Total Govt. Education Expenditure (as a % of GDP)',
ylab = 'Satisfaction (from 1-10)',
pch = 16,
col = gray(.0, .1))
grid()
model4 <- lm(formula = satis ~ edexp, data = satisf)
abline(model4, col = 'blue')

# observation: weak positive linear relationship
# cor. coeff = 0.36896864, may be affected by outlier
# 5. unemployment
plot(x = unem,
y = satis,
main = 'Satisfaction versus Unemployment',
xlab = 'Unemployment in Population 15+ (%)',
ylab = 'Satisfaction (from 1-10)',
pch = 16,
col = gray(.0, .1))
grid()
model5 <- lm(formula = satis ~ unem, data = satisf)
abline(model5, col = 'blue')

# observation = moderate negative linear relationship
# 2 outliers: 1, 2
# cor. coeff = -0.20947422, weak negative linear relationship
# 6. trade
plot(x = trade,
y = satis,
main = 'Satisfaction versus Trade',
xlab = 'Openness to Foreign Trade and Economic Integration Index',
ylab = 'Satisfaction (from 1-10)',
pch = 16,
col = gray(.0, .1))
grid()
model6 <- lm(formula = satis ~ trade, data = satisf)
abline(model6, col = 'blue')

#no clear relationship
# cor. coeff: 0.085833379, negligent correlation
#Advice: remove trade from model
#7. Education
plot(x = educ,
y = satis,
main = 'Satisfaction versus Education',
xlab = 'Mean Education in People 25+ (years)',
ylab = 'Satisfaction (from 1-10)',
pch = 16,
col = gray(.0, .1))
grid()
model7 <- lm(formula = satis ~ educ, data = satisf)
abline(model7, col = 'blue')

# moderate positive linear relationship
# might show heteroscedasticity
# Cor. coeff = 0.61805649
# 8. Mortality
plot(x = mort,
y = satis,
main = 'Satisfaction versus Mortality',
xlab = 'Mortality Rate',
ylab = 'Satisfaction (from 1-10)',
pch = 16,
col = gray(.0, .1))
grid()
model8 <- lm(formula = satis ~ mort, data = satisf)
abline(model8, col = 'blue')

# Moderate negative linear relationship
# cor. coeff = -0.58862202
### Full model will all 9 variables
fullmodel1 <- lm(formula = satis ~
gdp + oecd + le + educ + mort + hexp + edexp + unem + trade,
data = satisf)
summary(fullmodel1)
##
## Call:
## lm(formula = satis ~ gdp + oecd + le + educ + mort + hexp + edexp +
## unem + trade, data = satisf)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.27285 -0.32505 0.00461 0.35310 1.17435
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.223081 1.674305 -0.731 0.466792
## gdp 0.021828 0.006542 3.337 0.001191 **
## oecd 0.483177 0.141813 3.407 0.000947 ***
## le 0.072181 0.021738 3.321 0.001255 **
## educ 0.003371 0.033672 0.100 0.920446
## mort 0.002505 0.001470 1.705 0.091359 .
## hexp 0.029349 0.019660 1.493 0.138633
## edexp 0.167158 0.033758 4.952 2.99e-06 ***
## unem -0.025871 0.010389 -2.490 0.014419 *
## trade -0.002690 0.001229 -2.189 0.030905 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5707 on 100 degrees of freedom
## Multiple R-squared: 0.747, Adjusted R-squared: 0.7243
## F-statistic: 32.81 on 9 and 100 DF, p-value: < 2.2e-16
# oecd and edexp significant at 1%,
# GDP and LE at 5%,
# unem and trade at 10%,
# mort, hexp and educ not significant
# Globally significant at 1%,
# R2 = 0.747, adj.r2 = 0.7243
### Trying to remove LE and edu to remove multicollinearity, trade cause no correlation
fullmodel2 <- lm(formula = satis ~
gdp + oecd + mort + hexp + edexp + unem,
data = satisf)
summary(fullmodel2)
##
## Call:
## lm(formula = satis ~ gdp + oecd + mort + hexp + edexp + unem,
## data = satisf)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.15759 -0.38244 -0.05127 0.40205 1.34668
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.5612284 0.2583067 17.658 < 2e-16 ***
## gdp 0.0245446 0.0059388 4.133 7.31e-05 ***
## oecd 0.5828003 0.1510903 3.857 0.00020 ***
## mort -0.0022926 0.0007422 -3.089 0.00258 **
## hexp 0.0411066 0.0209105 1.966 0.05201 .
## edexp 0.1477892 0.0355426 4.158 6.65e-05 ***
## unem -0.0157480 0.0108620 -1.450 0.15015
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6168 on 103 degrees of freedom
## Multiple R-squared: 0.6957, Adjusted R-squared: 0.6779
## F-statistic: 39.24 on 6 and 103 DF, p-value: < 2.2e-16
# increased indiv, significance for intercept, gdp, mort
# unem and hexp not significant
# reduced explanatory power, adj.r2 = 67.79%
# may not be best
### Manual stepwise regression to have 6 exp. variables
#1. Removing the 2 least individually significant variables (mort, edu)
# as well as trade cause no significance
fullmodel3 <- lm(formula = satis ~
gdp + oecd + le + edexp + unem + hexp,
data = satisf)
summary(fullmodel3)
##
## Call:
## lm(formula = satis ~ gdp + oecd + le + edexp + unem + hexp, data = satisf)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.2130 -0.3668 -0.0075 0.3328 1.2741
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.933353 0.682542 1.367 0.174456
## gdp 0.017695 0.005801 3.050 0.002906 **
## oecd 0.534449 0.143318 3.729 0.000315 ***
## le 0.045859 0.009521 4.816 5.04e-06 ***
## edexp 0.156168 0.033553 4.654 9.71e-06 ***
## unem -0.019945 0.010183 -1.959 0.052855 .
## hexp 0.039985 0.019595 2.041 0.043849 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5825 on 103 degrees of freedom
## Multiple R-squared: 0.7286, Adjusted R-squared: 0.7128
## F-statistic: 46.08 on 6 and 103 DF, p-value: < 2.2e-16
# le, hexp and GDP increased individual significance
# unem no significance
# reduced exp. power 0.7128 adj.r2
#2. Removing 2 of the least significant, as well as LE causing multicollinearity
fullmodel4 <- lm(formula = satis ~
gdp + oecd + mort + edexp + unem,
data = satisf)
summary(fullmodel4)
##
## Call:
## lm(formula = satis ~ gdp + oecd + mort + edexp + unem, data = satisf)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.19056 -0.34226 -0.03269 0.38952 1.39722
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.6990514 0.2520108 18.646 < 2e-16 ***
## gdp 0.0276611 0.0058016 4.768 6.08e-06 ***
## oecd 0.5996037 0.1529117 3.921 0.000158 ***
## mort -0.0020661 0.0007433 -2.780 0.006458 **
## edexp 0.1571781 0.0357020 4.403 2.60e-05 ***
## unem -0.0144554 0.0109904 -1.315 0.191311
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6252 on 104 degrees of freedom
## Multiple R-squared: 0.6842, Adjusted R-squared: 0.6691
## F-statistic: 45.07 on 5 and 104 DF, p-value: < 2.2e-16
#increased individual significance to 1%, and mort to 10%
#Reduced explanatory power, adj.r2 = 0.6691
###Stepwise regression using r packages to get 6 exp. variables
fullmodel5test = regsubsets(satis ~ gdp + oecd + le + educ + mort + hexp + edexp
+ unem,
data = satisf)
summary(fullmodel5test)
## Subset selection object
## Call: regsubsets.formula(satis ~ gdp + oecd + le + educ + mort + hexp +
## edexp + unem, data = satisf)
## 8 Variables (and intercept)
## Forced in Forced out
## gdp FALSE FALSE
## oecd FALSE FALSE
## le FALSE FALSE
## educ FALSE FALSE
## mort FALSE FALSE
## hexp FALSE FALSE
## edexp FALSE FALSE
## unem FALSE FALSE
## 1 subsets of each size up to 8
## Selection Algorithm: exhaustive
## gdp oecd le educ mort hexp edexp unem
## 1 ( 1 ) "*" " " " " " " " " " " " " " "
## 2 ( 1 ) "*" "*" " " " " " " " " " " " "
## 3 ( 1 ) " " "*" "*" " " " " " " "*" " "
## 4 ( 1 ) "*" "*" "*" " " " " " " "*" " "
## 5 ( 1 ) "*" "*" "*" " " " " "*" "*" " "
## 6 ( 1 ) "*" "*" "*" " " " " "*" "*" "*"
## 7 ( 1 ) "*" "*" "*" " " "*" "*" "*" "*"
## 8 ( 1 ) "*" "*" "*" "*" "*" "*" "*" "*"
names(summary(fullmodel5test))
## [1] "which" "rsq" "rss" "adjr2" "cp" "bic" "outmat" "obj"
summary(fullmodel5test)$adjr2
## [1] 0.5379392 0.6054336 0.6637488 0.6980031 0.7049505 0.7127838 0.7166564
## [8] 0.7138959
fullmodel5 <- lm(formula = satis ~ gdp + oecd + le + edexp + unem + hexp,
data = satisf)
summary(fullmodel5)
##
## Call:
## lm(formula = satis ~ gdp + oecd + le + edexp + unem + hexp, data = satisf)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.2130 -0.3668 -0.0075 0.3328 1.2741
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.933353 0.682542 1.367 0.174456
## gdp 0.017695 0.005801 3.050 0.002906 **
## oecd 0.534449 0.143318 3.729 0.000315 ***
## le 0.045859 0.009521 4.816 5.04e-06 ***
## edexp 0.156168 0.033553 4.654 9.71e-06 ***
## unem -0.019945 0.010183 -1.959 0.052855 .
## hexp 0.039985 0.019595 2.041 0.043849 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5825 on 103 degrees of freedom
## Multiple R-squared: 0.7286, Adjusted R-squared: 0.7128
## F-statistic: 46.08 on 6 and 103 DF, p-value: < 2.2e-16
#this method suggests removing educ and mort
#reduced exp. power 0.7128 adj.r2
stargazer(fullmodel1, fullmodel2, fullmodel3, fullmodel4, fullmodel5,
type="html", out = "fullmodels.doc")
##
## <table style="text-align:center"><tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td colspan="5"><em>Dependent variable:</em></td></tr>
## <tr><td></td><td colspan="5" style="border-bottom: 1px solid black"></td></tr>
## <tr><td style="text-align:left"></td><td colspan="5">satis</td></tr>
## <tr><td style="text-align:left"></td><td>(1)</td><td>(2)</td><td>(3)</td><td>(4)</td><td>(5)</td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">gdp</td><td>0.022<sup>***</sup></td><td>0.025<sup>***</sup></td><td>0.018<sup>***</sup></td><td>0.028<sup>***</sup></td><td>0.018<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td>(0.007)</td><td>(0.006)</td><td>(0.006)</td><td>(0.006)</td><td>(0.006)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">oecd</td><td>0.483<sup>***</sup></td><td>0.583<sup>***</sup></td><td>0.534<sup>***</sup></td><td>0.600<sup>***</sup></td><td>0.534<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td>(0.142)</td><td>(0.151)</td><td>(0.143)</td><td>(0.153)</td><td>(0.143)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">le</td><td>0.072<sup>***</sup></td><td></td><td>0.046<sup>***</sup></td><td></td><td>0.046<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td>(0.022)</td><td></td><td>(0.010)</td><td></td><td>(0.010)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">educ</td><td>0.003</td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left"></td><td>(0.034)</td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">mort</td><td>0.003<sup>*</sup></td><td>-0.002<sup>***</sup></td><td></td><td>-0.002<sup>***</sup></td><td></td></tr>
## <tr><td style="text-align:left"></td><td>(0.001)</td><td>(0.001)</td><td></td><td>(0.001)</td><td></td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">hexp</td><td>0.029</td><td>0.041<sup>*</sup></td><td>0.040<sup>**</sup></td><td></td><td>0.040<sup>**</sup></td></tr>
## <tr><td style="text-align:left"></td><td>(0.020)</td><td>(0.021)</td><td>(0.020)</td><td></td><td>(0.020)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">edexp</td><td>0.167<sup>***</sup></td><td>0.148<sup>***</sup></td><td>0.156<sup>***</sup></td><td>0.157<sup>***</sup></td><td>0.156<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td>(0.034)</td><td>(0.036)</td><td>(0.034)</td><td>(0.036)</td><td>(0.034)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">unem</td><td>-0.026<sup>**</sup></td><td>-0.016</td><td>-0.020<sup>*</sup></td><td>-0.014</td><td>-0.020<sup>*</sup></td></tr>
## <tr><td style="text-align:left"></td><td>(0.010)</td><td>(0.011)</td><td>(0.010)</td><td>(0.011)</td><td>(0.010)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">trade</td><td>-0.003<sup>**</sup></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left"></td><td>(0.001)</td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Constant</td><td>-1.223</td><td>4.561<sup>***</sup></td><td>0.933</td><td>4.699<sup>***</sup></td><td>0.933</td></tr>
## <tr><td style="text-align:left"></td><td>(1.674)</td><td>(0.258)</td><td>(0.683)</td><td>(0.252)</td><td>(0.683)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">Observations</td><td>110</td><td>110</td><td>110</td><td>110</td><td>110</td></tr>
## <tr><td style="text-align:left">R<sup>2</sup></td><td>0.747</td><td>0.696</td><td>0.729</td><td>0.684</td><td>0.729</td></tr>
## <tr><td style="text-align:left">Adjusted R<sup>2</sup></td><td>0.724</td><td>0.678</td><td>0.713</td><td>0.669</td><td>0.713</td></tr>
## <tr><td style="text-align:left">Residual Std. Error</td><td>0.571 (df = 100)</td><td>0.617 (df = 103)</td><td>0.582 (df = 103)</td><td>0.625 (df = 104)</td><td>0.582 (df = 103)</td></tr>
## <tr><td style="text-align:left">F Statistic</td><td>32.810<sup>***</sup> (df = 9; 100)</td><td>39.239<sup>***</sup> (df = 6; 103)</td><td>46.084<sup>***</sup> (df = 6; 103)</td><td>45.072<sup>***</sup> (df = 5; 104)</td><td>46.084<sup>***</sup> (df = 6; 103)</td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td colspan="5" style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr>
## </table>
## we can see from all of these methods, the best models are models are
## 3 and 5 which drop trade, educ and mort but maintain high exp. power
## model 4 has less exp power but the avg. std. error of coeff is lower
## Avg. std error of model 3/5= 0.12611, model 4=0.06599
### Added Variable plots
avPlots(fullmodel3)

## checking for linearity using residual analysis
satisf$predicted <- predict(fullmodel3)
satisf$residuals <- residuals(fullmodel3)
satisf$fitted <- fitted(fullmodel3)
view(satisf)
predicted <- satisf$predicted
residuals <- satisf$residuals
fitted <- satisf$fitted
mean(residuals)
## [1] -7.747117e-19
plot(x = predicted,
y = satis,
main = 'Predicted v. Actual Values',
xlab = 'Predicted Satisfaction Values',
ylab = 'Actual Satisfaction Values',
pch = 16,
col = gray(.0, .1))
grid()
abline(lm(formula = satis ~ predicted, data = satisf), col="blue")

plot(x = fitted,
y = residuals,
main = 'Residual Analysis',
xlab = 'Fitted values',
ylab = 'Residuals',
pch = 16,
col = gray(.0, .1))
grid()
abline(h=0, col="blue")

# mean of residuals is -7.747117e-19, very close to 0,
# there is no recognizable trend,
# therefore linearitysatisfied
### Checking for heteroscedasticity
residualssqrd <- residuals(fullmodel3)**2
plot(x = predicted,
y = residualssqrd,
main = 'Squared Residuals vs.Fitted Values',
xlab = 'Fitted values',
ylab = 'Squared Residuals',
pch = 16,
col = gray(.0, .1))

#slight curvature, proceed with further tests to verify
### White test for heteroscedasticity
white_lm(fullmodel3, interactions = TRUE)
## # A tibble: 1 x 5
## statistic p.value parameter method alternative
## <dbl> <dbl> <dbl> <chr> <chr>
## 1 25.2 0.564 27 White's Test greater
# P value (0.564)> any significance level,
# fail to reject the null hypothesis
# Full Model no. 3 is homoscedastic.
### Checking for outliers
ols_plot_resid_stand(fullmodel3)

#7 outliers: 11, 17, 35, 36, 82, 90, 96
### Test for stability (do we need a structural break?)
#1st graphical inspection
table(oecd)
## oecd
## 0 1
## 74 36
mean(oecd)
## [1] 0.3272727
oecdplot <- ggplot(data = satisf, aes(x = gdp, y = satis, colour=factor(oecd)))
oecdplot + stat_smooth(method = lm, fullrange = FALSE) +
labs(title = 'Satisfaction versus GDP',
x = 'GDP',
y = 'Satisfaction') +
scale_colour_discrete(name ="OECD Status",
breaks = c("0", "1"),
labels=c("Non-OECD", "OECD"))
## `geom_smooth()` using formula 'y ~ x'

#2nd, chow test
oecdyes <- filter(satisf, oecd == 1)
oecdno <- filter(satisf, oecd == 0)
y1 <- matrix(c(oecdyes[, "SATISF"]), byrow=TRUE, ncol=1)
y2 <- matrix(c(oecdno[, "SATISF"]), byrow=TRUE, ncol=1)
x1 <- matrix(c(oecdyes[, "GDP"]), byrow=TRUE, ncol=1)
x2 <- matrix(c(oecdno[, "GDP"]), byrow=TRUE, ncol=1)
chow.test(y1, x1, y2, x2)
## F value d.f.1 d.f.2 P value
## 1.080144e+01 2.000000e+00 1.060000e+02 5.377815e-05
# P value 5.377815e-05 significant at 1%
# reject null hypothesis
# a break between OECD and non OECD is significant