##Computer exam R


##Loading the packages 

library("tidyverse")
## Warning: package 'tidyverse' was built under R version 4.0.5
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.3     v purrr   0.3.4
## v tibble  3.1.1     v dplyr   1.0.6
## v tidyr   1.1.3     v stringr 1.4.0
## v readr   1.4.0     v forcats 0.5.1
## Warning: package 'ggplot2' was built under R version 4.0.5
## Warning: package 'tibble' was built under R version 4.0.5
## Warning: package 'tidyr' was built under R version 4.0.5
## Warning: package 'dplyr' was built under R version 4.0.5
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library('yarrr')
## Warning: package 'yarrr' was built under R version 4.0.5
## Loading required package: jpeg
## Warning: package 'jpeg' was built under R version 4.0.5
## Loading required package: BayesFactor
## Warning: package 'BayesFactor' was built under R version 4.0.5
## Loading required package: coda
## Warning: package 'coda' was built under R version 4.0.5
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## ************
## Welcome to BayesFactor 0.9.12-4.2. If you have questions, please contact Richard Morey (richarddmorey@gmail.com).
## 
## Type BFManual() to open the manual.
## ************
## Loading required package: circlize
## Warning: package 'circlize' was built under R version 4.0.5
## ========================================
## circlize version 0.4.13
## CRAN page: https://cran.r-project.org/package=circlize
## Github page: https://github.com/jokergoo/circlize
## Documentation: https://jokergoo.github.io/circlize_book/book/
## 
## If you use it in published research, please cite:
## Gu, Z. circlize implements and enhances circular visualization
##   in R. Bioinformatics 2014.
## 
## This message can be suppressed by:
##   suppressPackageStartupMessages(library(circlize))
## ========================================
## yarrr v0.1.5. Citation info at citation('yarrr'). Package guide at yarrr.guide()
## Email me at Nathaniel.D.Phillips.is@gmail.com
## 
## Attaching package: 'yarrr'
## The following object is masked from 'package:ggplot2':
## 
##     diamonds
library('pastecs')
## Warning: package 'pastecs' was built under R version 4.0.5
## 
## Attaching package: 'pastecs'
## The following objects are masked from 'package:dplyr':
## 
##     first, last
## The following object is masked from 'package:tidyr':
## 
##     extract
library("corrplot")
## corrplot 0.90 loaded
library("stats")

library("stargazer")
## 
## Please cite as:
##  Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
##  R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
library("car")
## Warning: package 'car' was built under R version 4.0.5
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
## The following object is masked from 'package:purrr':
## 
##     some
library("gap")
## Warning: package 'gap' was built under R version 4.0.5
## gap version 1.2.3-1
## 
## Attaching package: 'gap'
## The following object is masked from 'package:car':
## 
##     logit
library("olsrr")
## Warning: package 'olsrr' was built under R version 4.0.5
## 
## Attaching package: 'olsrr'
## The following object is masked from 'package:datasets':
## 
##     rivers
library("skedastic")
## Warning: package 'skedastic' was built under R version 4.0.5
library("leaps")
## Warning: package 'leaps' was built under R version 4.0.5
library("MASS")
## Warning: package 'MASS' was built under R version 4.0.5
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:olsrr':
## 
##     cement
## The following object is masked from 'package:dplyr':
## 
##     select
##Setting up the working directory 

setwd("C:/Econometrics/CE")




## Reading the data as csv. file 

satisf<-read.csv("C:/Econometrics/CE/satisfaction.csv")

head (satisf)
##   SATISF      GDP OECD     LE     EDUC MORT     HEXP   EDEXP UNEM     TRADE
## 1    3.6 1.377808    0 64.723 5.169435  213 4.066702 2.77576  3.8  63.17296
## 2    3.7 1.216050    0 56.112 1.583342  348 5.955873 3.06664  1.7  78.37647
## 3    3.7 2.173523    0 63.451 4.451000  293 5.983109 5.63358 10.4  68.00035
## 4    3.9 2.789078    0 71.916 5.772000  260 5.690086 2.60388  7.1 113.58184
## 5    3.9 5.631302    0 58.793 6.090000  332 2.452738 6.22079 10.0 122.09243
## 6    4.0 1.654486    0 61.530 5.110000  363 7.277308 6.18120  4.3  81.27242
stat.desc(satisf)
##                   SATISF          GDP         OECD           LE        EDUC
## nbr.val      110.0000000  110.0000000 110.00000000  110.0000000 110.0000000
## nbr.null       0.0000000    0.0000000  74.00000000    0.0000000   0.0000000
## nbr.na         0.0000000    0.0000000   0.00000000    0.0000000   0.0000000
## min            3.6000000    0.7823873   0.00000000   45.5610000   1.2510000
## max            7.8000000   71.4748882   1.00000000   83.5800000  12.9478252
## range          4.2000000   70.6925009   1.00000000   38.0190000  11.6968252
## sum          617.9000000 2000.7516568  36.00000000 7948.2880000 959.9153144
## median         5.5000000   13.9433323   0.00000000   74.1760000   9.2637905
## mean           5.6172727   18.1886514   0.32727273   72.2571636   8.7265029
## SE.mean        0.1036278    1.4662687   0.04494291    0.8186187   0.2786660
## CI.mean.0.95   0.2053869    2.9060969   0.08907538    1.6224757   0.5523070
## var            1.1812585  236.4938288   0.22218515   73.7150245   8.5420234
## std.dev        1.0868572   15.3783559   0.47136520    8.5857454   2.9226740
## coef.var       0.1934849    0.8454918   1.44028256    0.1188221   0.3349193
##                      MORT        HEXP       EDEXP         UNEM        TRADE
## nbr.val      1.100000e+02 110.0000000 110.0000000  110.0000000 1.100000e+02
## nbr.null     0.000000e+00   0.0000000   0.0000000    0.0000000 0.000000e+00
## nbr.na       0.000000e+00   0.0000000   0.0000000    0.0000000 0.000000e+00
## min          6.400000e+01   2.4527379   1.3453600    0.6000000 2.654395e+01
## max          5.830000e+02  19.4816646  12.9821800   28.6000000 3.791448e+02
## range        5.190000e+02  17.0289267  11.6368200   28.0000000 3.526008e+02
## sum          2.208700e+04 810.8150472 535.6914300 1002.4000000 1.052987e+04
## median       1.785000e+02   6.7341982   5.0181600    7.5000000 8.389420e+01
## mean         2.007909e+02   7.3710459   4.8699221    9.1127273 9.572608e+01
## SE.mean      1.067640e+01   0.2925982   0.1752819    0.5734567 4.638582e+00
## CI.mean.0.95 2.116028e+01   0.5799202   0.3474030    1.1365725 9.193518e+00
## var          1.253841e+04   9.4175098   3.3796111   36.1737815 2.366808e+03
## std.dev      1.119750e+02   3.0687962   1.8383719    6.0144644 4.864986e+01
## coef.var     5.576698e-01   0.4163312   0.3774951    0.6600071 5.082195e-01
view(satisf)



### Changing names of columns to variable names 

satis <- satisf$SATISF

gdp <- satisf$GDP

oecd <- satisf$OECD

le <- satisf$LE

educ <- satisf$EDUC

mort <- satisf$MORT

hexp <- satisf$HEXP

edexp <- satisf$EDEXP

unem <- satisf$UNEM

trade <- satisf$TRADE



### correlation Analysis 

cor(satisf)
##             SATISF        GDP        OECD          LE        EDUC        MORT
## SATISF  1.00000000  0.7363276  0.59952233  0.68144667  0.61805649 -0.58862202
## GDP     0.73632760  1.0000000  0.50243928  0.71144116  0.70405188 -0.66123877
## OECD    0.59952233  0.5024393  1.00000000  0.42407698  0.35480164 -0.39012983
## LE      0.68144667  0.7114412  0.42407698  1.00000000  0.70221420 -0.91845238
## EDUC    0.61805649  0.7040519  0.35480164  0.70221420  1.00000000 -0.52391783
## MORT   -0.58862202 -0.6612388 -0.39012983 -0.91845238 -0.52391783  1.00000000
## HEXP    0.33498251  0.3004548  0.17590228  0.13053880  0.21128462 -0.05630422
## EDEXP   0.36896864  0.2300618  0.13679572  0.03523426  0.22891043  0.01864908
## UNEM   -0.20947422 -0.1545970 -0.29272976 -0.16948097  0.04114524  0.25586231
## TRADE   0.08583338  0.2596846  0.03904115  0.09715033  0.20201366 -0.03088821
##                HEXP      EDEXP        UNEM        TRADE
## SATISF  0.334982515 0.36896864 -0.20947422  0.085833379
## GDP     0.300454812 0.23006177 -0.15459704  0.259684584
## OECD    0.175902276 0.13679572 -0.29272976  0.039041153
## LE      0.130538796 0.03523426 -0.16948097  0.097150331
## EDUC    0.211284617 0.22891043  0.04114524  0.202013661
## MORT   -0.056304220 0.01864908  0.25586231 -0.030888214
## HEXP    1.000000000 0.25085468  0.06767495  0.004961089
## EDEXP   0.250854679 1.00000000  0.23174783  0.233257624
## UNEM    0.067674947 0.23174783  1.00000000 -0.015539530
## TRADE   0.004961089 0.23325762 -0.01553953  1.000000000
M<- cor(satisf)

corrplot(M, type = "upper", order = "hclust", tl.col = "black", tl.srt = 45)       

# LE correlation greater than 0.7 with GDP, Edu and Mort
# Edu correlation greater than 0.7 with GDP
# Advice: remove LE and edu


### checking relationship between each exp. variable and satisfaction

# 1. GDP

plot(x =gdp,        # X coordinates
     y = satis,        # y-coordinates
     main = 'Satisfaction versus GDP',
     xlab = 'GDP per Capita (thousand dollars)',   # x-axis label
     ylab = 'Satisfaction (from 1-10)',   # y-axis label
     pch = 16,                  # Filled circles
     col = gray(.0, .1))        # Transparent gray

grid()        # Add gridlines

model1<- lm(formula = satis ~ gdp, data = satisf)

abline(model1, col = 'blue')      # Add regression line to plot

# observation: strong positive linear relationship, 
# may show heteroscedasticity 
# cor. coef. = 0.7363276


# 2. Life expectancy

plot(x = le,     
     y = satis,       
     main = 'Satisfaction versus Life Expectancy',
     xlab = 'Life Expectancy at birth (in years)', 
     ylab = 'Satisfaction (from 1-10)',   
     pch = 16,                
     col = gray(.0, .1))        
grid()        

model2 <- lm(formula = satis ~ le, data = satisf)

abline(model2, col='blue')

# observation: moderate positive relationship 
# may be non linear or heteroscedastic 
# cor. coeff. = 0.68144667

# 3. Govt. health exp.  

plot(x = hexp,     
     y = satis,       
     main = 'Satisfaction versus Govt. Health Expenditure',
     xlab = 'Total Govt. Health Expenditure (as a % of GDP)', 
     ylab = 'Satisfaction (from 1-10)',   
     pch = 16,                
     col = gray(.0, .1))        
grid()        

model3 <- lm(formula = satis ~ hexp, data = satisf)

abline(model3, col = 'blue')

# observation weak positive relationship 
# cor. coeff = 0.334982515, 

# 4. Govt. educ exp. 

plot(x = edexp,     
     y = satis,       
     main = 'Satisfaction versus Govt. Education Expenditure',
     xlab = 'Total Govt. Education Expenditure (as a % of GDP)', 
     ylab = 'Satisfaction (from 1-10)',   
     pch = 16,                
     col = gray(.0, .1))        
grid()        

model4 <- lm(formula = satis ~ edexp, data = satisf)

abline(model4, col = 'blue')

# observation: weak positive linear relationship 
# cor. coeff = 0.36896864, may be affected by outlier 

# 5. unemployment 

plot(x = unem,     
     y = satis,       
     main = 'Satisfaction versus Unemployment',
     xlab = 'Unemployment in Population 15+ (%)', 
     ylab = 'Satisfaction (from 1-10)',   
     pch = 16,                
     col = gray(.0, .1))        
grid()        

model5 <- lm(formula = satis ~ unem, data = satisf)

abline(model5, col = 'blue')

# observation = moderate negative linear relationship 
# 2 outliers: 1, 2
# cor. coeff = -0.20947422, weak negative linear relationship

# 6. trade 

plot(x = trade,     
     y = satis,       
     main = 'Satisfaction versus Trade',
     xlab = 'Openness to Foreign Trade and Economic Integration Index', 
     ylab = 'Satisfaction (from 1-10)',   
     pch = 16,                
     col = gray(.0, .1))        
grid()        

model6 <- lm(formula = satis ~ trade, data = satisf)

abline(model6, col = 'blue')

#no clear relationship
# cor. coeff: 0.085833379, negligent correlation
#Advice: remove trade from model

#7. Education 

plot(x = educ,     
     y = satis,       
     main = 'Satisfaction versus Education',
     xlab = 'Mean Education in People 25+ (years)', 
     ylab = 'Satisfaction (from 1-10)',   
     pch = 16,                
     col = gray(.0, .1))        
grid()        

model7 <- lm(formula = satis ~ educ, data = satisf)

abline(model7, col = 'blue')

# moderate positive linear relationship 
# might show heteroscedasticity
# Cor. coeff = 0.61805649

# 8. Mortality 

plot(x = mort,     
     y = satis,       
     main = 'Satisfaction versus Mortality',
     xlab = 'Mortality Rate', 
     ylab = 'Satisfaction (from 1-10)',   
     pch = 16,                
     col = gray(.0, .1))        
grid()        

model8 <- lm(formula = satis ~ mort, data = satisf)

abline(model8, col = 'blue')

# Moderate negative linear relationship 
# cor. coeff = -0.58862202



### Full model will all 9 variables   

fullmodel1 <- lm(formula = satis ~ 
                        gdp + oecd + le + educ + mort + hexp + edexp + unem + trade,
                data = satisf)

summary(fullmodel1)      
## 
## Call:
## lm(formula = satis ~ gdp + oecd + le + educ + mort + hexp + edexp + 
##     unem + trade, data = satisf)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.27285 -0.32505  0.00461  0.35310  1.17435 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.223081   1.674305  -0.731 0.466792    
## gdp          0.021828   0.006542   3.337 0.001191 ** 
## oecd         0.483177   0.141813   3.407 0.000947 ***
## le           0.072181   0.021738   3.321 0.001255 ** 
## educ         0.003371   0.033672   0.100 0.920446    
## mort         0.002505   0.001470   1.705 0.091359 .  
## hexp         0.029349   0.019660   1.493 0.138633    
## edexp        0.167158   0.033758   4.952 2.99e-06 ***
## unem        -0.025871   0.010389  -2.490 0.014419 *  
## trade       -0.002690   0.001229  -2.189 0.030905 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5707 on 100 degrees of freedom
## Multiple R-squared:  0.747,  Adjusted R-squared:  0.7243 
## F-statistic: 32.81 on 9 and 100 DF,  p-value: < 2.2e-16
        # oecd and edexp significant at 1%, 
        # GDP and LE at 5%,
        # unem and trade at 10%, 
        # mort, hexp and educ not significant
        # Globally significant at 1%, 
        # R2 = 0.747, adj.r2 = 0.7243



### Trying to remove LE and edu to remove multicollinearity, trade cause no correlation

fullmodel2 <- lm(formula = satis ~ 
                        gdp + oecd + mort + hexp + edexp + unem,
                data = satisf)

summary(fullmodel2) 
## 
## Call:
## lm(formula = satis ~ gdp + oecd + mort + hexp + edexp + unem, 
##     data = satisf)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.15759 -0.38244 -0.05127  0.40205  1.34668 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  4.5612284  0.2583067  17.658  < 2e-16 ***
## gdp          0.0245446  0.0059388   4.133 7.31e-05 ***
## oecd         0.5828003  0.1510903   3.857  0.00020 ***
## mort        -0.0022926  0.0007422  -3.089  0.00258 ** 
## hexp         0.0411066  0.0209105   1.966  0.05201 .  
## edexp        0.1477892  0.0355426   4.158 6.65e-05 ***
## unem        -0.0157480  0.0108620  -1.450  0.15015    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6168 on 103 degrees of freedom
## Multiple R-squared:  0.6957, Adjusted R-squared:  0.6779 
## F-statistic: 39.24 on 6 and 103 DF,  p-value: < 2.2e-16
        # increased indiv, significance for intercept, gdp, mort  
        # unem and hexp not significant
        # reduced explanatory power, adj.r2 = 67.79%
        # may not be best



### Manual stepwise regression to have 6 exp. variables

#1. Removing the 2 least individually significant variables (mort, edu)
# as well as trade cause no significance

fullmodel3 <- lm(formula = satis ~ 
                         gdp + oecd + le + edexp + unem + hexp,
                 data = satisf)

summary(fullmodel3)
## 
## Call:
## lm(formula = satis ~ gdp + oecd + le + edexp + unem + hexp, data = satisf)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.2130 -0.3668 -0.0075  0.3328  1.2741 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.933353   0.682542   1.367 0.174456    
## gdp          0.017695   0.005801   3.050 0.002906 ** 
## oecd         0.534449   0.143318   3.729 0.000315 ***
## le           0.045859   0.009521   4.816 5.04e-06 ***
## edexp        0.156168   0.033553   4.654 9.71e-06 ***
## unem        -0.019945   0.010183  -1.959 0.052855 .  
## hexp         0.039985   0.019595   2.041 0.043849 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5825 on 103 degrees of freedom
## Multiple R-squared:  0.7286, Adjusted R-squared:  0.7128 
## F-statistic: 46.08 on 6 and 103 DF,  p-value: < 2.2e-16
        # le, hexp and GDP increased individual significance 
        # unem no significance
        # reduced exp. power 0.7128 adj.r2 


#2. Removing 2 of the least significant, as well as LE causing multicollinearity

fullmodel4 <- lm(formula = satis ~ 
                         gdp + oecd + mort + edexp + unem,
                 data = satisf)

summary(fullmodel4)
## 
## Call:
## lm(formula = satis ~ gdp + oecd + mort + edexp + unem, data = satisf)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.19056 -0.34226 -0.03269  0.38952  1.39722 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  4.6990514  0.2520108  18.646  < 2e-16 ***
## gdp          0.0276611  0.0058016   4.768 6.08e-06 ***
## oecd         0.5996037  0.1529117   3.921 0.000158 ***
## mort        -0.0020661  0.0007433  -2.780 0.006458 ** 
## edexp        0.1571781  0.0357020   4.403 2.60e-05 ***
## unem        -0.0144554  0.0109904  -1.315 0.191311    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6252 on 104 degrees of freedom
## Multiple R-squared:  0.6842, Adjusted R-squared:  0.6691 
## F-statistic: 45.07 on 5 and 104 DF,  p-value: < 2.2e-16
        #increased individual significance to 1%, and mort to 10%
        #Reduced explanatory power, adj.r2 = 0.6691



###Stepwise regression using r packages to get 6 exp. variables

fullmodel5test = regsubsets(satis ~ gdp + oecd + le + educ + mort + hexp + edexp 
                                 + unem,
                         data = satisf)

summary(fullmodel5test)
## Subset selection object
## Call: regsubsets.formula(satis ~ gdp + oecd + le + educ + mort + hexp + 
##     edexp + unem, data = satisf)
## 8 Variables  (and intercept)
##       Forced in Forced out
## gdp       FALSE      FALSE
## oecd      FALSE      FALSE
## le        FALSE      FALSE
## educ      FALSE      FALSE
## mort      FALSE      FALSE
## hexp      FALSE      FALSE
## edexp     FALSE      FALSE
## unem      FALSE      FALSE
## 1 subsets of each size up to 8
## Selection Algorithm: exhaustive
##          gdp oecd le  educ mort hexp edexp unem
## 1  ( 1 ) "*" " "  " " " "  " "  " "  " "   " " 
## 2  ( 1 ) "*" "*"  " " " "  " "  " "  " "   " " 
## 3  ( 1 ) " " "*"  "*" " "  " "  " "  "*"   " " 
## 4  ( 1 ) "*" "*"  "*" " "  " "  " "  "*"   " " 
## 5  ( 1 ) "*" "*"  "*" " "  " "  "*"  "*"   " " 
## 6  ( 1 ) "*" "*"  "*" " "  " "  "*"  "*"   "*" 
## 7  ( 1 ) "*" "*"  "*" " "  "*"  "*"  "*"   "*" 
## 8  ( 1 ) "*" "*"  "*" "*"  "*"  "*"  "*"   "*"
names(summary(fullmodel5test))
## [1] "which"  "rsq"    "rss"    "adjr2"  "cp"     "bic"    "outmat" "obj"
summary(fullmodel5test)$adjr2
## [1] 0.5379392 0.6054336 0.6637488 0.6980031 0.7049505 0.7127838 0.7166564
## [8] 0.7138959
fullmodel5 <- lm(formula = satis ~ gdp + oecd + le  + edexp + unem + hexp,
                   data = satisf)

summary(fullmodel5)
## 
## Call:
## lm(formula = satis ~ gdp + oecd + le + edexp + unem + hexp, data = satisf)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.2130 -0.3668 -0.0075  0.3328  1.2741 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.933353   0.682542   1.367 0.174456    
## gdp          0.017695   0.005801   3.050 0.002906 ** 
## oecd         0.534449   0.143318   3.729 0.000315 ***
## le           0.045859   0.009521   4.816 5.04e-06 ***
## edexp        0.156168   0.033553   4.654 9.71e-06 ***
## unem        -0.019945   0.010183  -1.959 0.052855 .  
## hexp         0.039985   0.019595   2.041 0.043849 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5825 on 103 degrees of freedom
## Multiple R-squared:  0.7286, Adjusted R-squared:  0.7128 
## F-statistic: 46.08 on 6 and 103 DF,  p-value: < 2.2e-16
        #this method suggests removing educ and mort
        #reduced exp. power 0.7128 adj.r2

stargazer(fullmodel1, fullmodel2, fullmodel3, fullmodel4, fullmodel5, 
          type="html", out = "fullmodels.doc")
## 
## <table style="text-align:center"><tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td colspan="5"><em>Dependent variable:</em></td></tr>
## <tr><td></td><td colspan="5" style="border-bottom: 1px solid black"></td></tr>
## <tr><td style="text-align:left"></td><td colspan="5">satis</td></tr>
## <tr><td style="text-align:left"></td><td>(1)</td><td>(2)</td><td>(3)</td><td>(4)</td><td>(5)</td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">gdp</td><td>0.022<sup>***</sup></td><td>0.025<sup>***</sup></td><td>0.018<sup>***</sup></td><td>0.028<sup>***</sup></td><td>0.018<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td>(0.007)</td><td>(0.006)</td><td>(0.006)</td><td>(0.006)</td><td>(0.006)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">oecd</td><td>0.483<sup>***</sup></td><td>0.583<sup>***</sup></td><td>0.534<sup>***</sup></td><td>0.600<sup>***</sup></td><td>0.534<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td>(0.142)</td><td>(0.151)</td><td>(0.143)</td><td>(0.153)</td><td>(0.143)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">le</td><td>0.072<sup>***</sup></td><td></td><td>0.046<sup>***</sup></td><td></td><td>0.046<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td>(0.022)</td><td></td><td>(0.010)</td><td></td><td>(0.010)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">educ</td><td>0.003</td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left"></td><td>(0.034)</td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">mort</td><td>0.003<sup>*</sup></td><td>-0.002<sup>***</sup></td><td></td><td>-0.002<sup>***</sup></td><td></td></tr>
## <tr><td style="text-align:left"></td><td>(0.001)</td><td>(0.001)</td><td></td><td>(0.001)</td><td></td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">hexp</td><td>0.029</td><td>0.041<sup>*</sup></td><td>0.040<sup>**</sup></td><td></td><td>0.040<sup>**</sup></td></tr>
## <tr><td style="text-align:left"></td><td>(0.020)</td><td>(0.021)</td><td>(0.020)</td><td></td><td>(0.020)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">edexp</td><td>0.167<sup>***</sup></td><td>0.148<sup>***</sup></td><td>0.156<sup>***</sup></td><td>0.157<sup>***</sup></td><td>0.156<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td>(0.034)</td><td>(0.036)</td><td>(0.034)</td><td>(0.036)</td><td>(0.034)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">unem</td><td>-0.026<sup>**</sup></td><td>-0.016</td><td>-0.020<sup>*</sup></td><td>-0.014</td><td>-0.020<sup>*</sup></td></tr>
## <tr><td style="text-align:left"></td><td>(0.010)</td><td>(0.011)</td><td>(0.010)</td><td>(0.011)</td><td>(0.010)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">trade</td><td>-0.003<sup>**</sup></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left"></td><td>(0.001)</td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Constant</td><td>-1.223</td><td>4.561<sup>***</sup></td><td>0.933</td><td>4.699<sup>***</sup></td><td>0.933</td></tr>
## <tr><td style="text-align:left"></td><td>(1.674)</td><td>(0.258)</td><td>(0.683)</td><td>(0.252)</td><td>(0.683)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">Observations</td><td>110</td><td>110</td><td>110</td><td>110</td><td>110</td></tr>
## <tr><td style="text-align:left">R<sup>2</sup></td><td>0.747</td><td>0.696</td><td>0.729</td><td>0.684</td><td>0.729</td></tr>
## <tr><td style="text-align:left">Adjusted R<sup>2</sup></td><td>0.724</td><td>0.678</td><td>0.713</td><td>0.669</td><td>0.713</td></tr>
## <tr><td style="text-align:left">Residual Std. Error</td><td>0.571 (df = 100)</td><td>0.617 (df = 103)</td><td>0.582 (df = 103)</td><td>0.625 (df = 104)</td><td>0.582 (df = 103)</td></tr>
## <tr><td style="text-align:left">F Statistic</td><td>32.810<sup>***</sup> (df = 9; 100)</td><td>39.239<sup>***</sup> (df = 6; 103)</td><td>46.084<sup>***</sup> (df = 6; 103)</td><td>45.072<sup>***</sup> (df = 5; 104)</td><td>46.084<sup>***</sup> (df = 6; 103)</td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td colspan="5" style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr>
## </table>
## we can see from all of these methods, the best models are models are
## 3 and 5 which drop trade, educ and mort but maintain high exp. power
## model 4 has less exp power but the avg. std. error of coeff is lower
## Avg. std error of model 3/5= 0.12611, model 4=0.06599



### Added Variable plots 

avPlots(fullmodel3)

## checking for linearity using residual analysis 

satisf$predicted <- predict(fullmodel3)
satisf$residuals <- residuals(fullmodel3)
satisf$fitted <- fitted(fullmodel3)

view(satisf)

predicted <- satisf$predicted
residuals <- satisf$residuals
fitted <- satisf$fitted

mean(residuals)
## [1] -7.747117e-19
plot(x = predicted,     
     y = satis,       
     main = 'Predicted v. Actual Values',
     xlab = 'Predicted Satisfaction Values', 
     ylab = 'Actual Satisfaction Values',   
     pch = 16,                
     col = gray(.0, .1))

grid()        

abline(lm(formula = satis ~ predicted, data = satisf), col="blue")

plot(x = fitted,     
     y = residuals,       
     main = 'Residual Analysis',
     xlab = 'Fitted values', 
     ylab = 'Residuals',   
     pch = 16,                
     col = gray(.0, .1))        

grid()        

abline(h=0, col="blue")

# mean of residuals is -7.747117e-19, very close to 0,
# there is no recognizable trend, 
# therefore linearitysatisfied


### Checking for heteroscedasticity 

residualssqrd <- residuals(fullmodel3)**2

plot(x = predicted, 
     y = residualssqrd,
     main = 'Squared Residuals vs.Fitted Values',
     xlab = 'Fitted values', 
     ylab = 'Squared Residuals',   
     pch = 16,                
     col = gray(.0, .1))

#slight curvature, proceed with further tests to verify



### White test for heteroscedasticity

white_lm(fullmodel3, interactions = TRUE)
## # A tibble: 1 x 5
##   statistic p.value parameter method       alternative
##       <dbl>   <dbl>     <dbl> <chr>        <chr>      
## 1      25.2   0.564        27 White's Test greater
        # P value (0.564)> any significance level,  
        # fail to reject the null hypothesis 
        # Full Model no. 3 is homoscedastic.


### Checking for outliers 

ols_plot_resid_stand(fullmodel3)

        #7 outliers: 11, 17, 35, 36, 82, 90, 96

### Test for stability (do we need a structural break?)

#1st graphical inspection

table(oecd)
## oecd
##  0  1 
## 74 36
mean(oecd)
## [1] 0.3272727
oecdplot <- ggplot(data = satisf, aes(x = gdp, y = satis, colour=factor(oecd)))

oecdplot + stat_smooth(method = lm, fullrange = FALSE) + 
        labs(title = 'Satisfaction versus GDP', 
             x = 'GDP', 
             y = 'Satisfaction') +
        scale_colour_discrete(name  ="OECD Status",
                              breaks = c("0", "1"),
                              labels=c("Non-OECD", "OECD"))
## `geom_smooth()` using formula 'y ~ x'

#2nd, chow test 

oecdyes <- filter(satisf, oecd == 1)
oecdno <- filter(satisf, oecd == 0)

y1 <- matrix(c(oecdyes[, "SATISF"]), byrow=TRUE, ncol=1)
y2 <- matrix(c(oecdno[, "SATISF"]), byrow=TRUE, ncol=1)

x1 <- matrix(c(oecdyes[, "GDP"]), byrow=TRUE, ncol=1)
x2 <- matrix(c(oecdno[, "GDP"]), byrow=TRUE, ncol=1)

chow.test(y1, x1, y2, x2)
##      F value        d.f.1        d.f.2      P value 
## 1.080144e+01 2.000000e+00 1.060000e+02 5.377815e-05
# P value 5.377815e-05 significant at 1%
# reject null hypothesis 
# a break between OECD and non OECD is significant