Conceptual points








Exercises in R

setwd('/Users/davidhamad/Documents/Cand.Scient.Pol/2. Semester/Statistical models beyond linear regression - applied statistics for political scientists/3) Linear regression')

load("MEP2014.rda")

df <- MEP2014

mod2 <- lm(LocalAssistants ~ OpenList + LaborCost, df)

mod2.party <- lm(LocalAssistants ~ OpenList + LaborCost + SeatsNatPal.prop, df)

stargazer(mod2, mod2.party, type = "text")

===================================================================
                                  Dependent variable:              
                    -----------------------------------------------
                                    LocalAssistants                
                              (1)                     (2)          
-------------------------------------------------------------------
OpenList                   0.829***                0.937***        
                            (0.228)                 (0.227)        
                                                                   
LaborCost                  -0.070***               -0.068***       
                            (0.010)                 (0.010)        
                                                                   
SeatsNatPal.prop                                    -0.184         
                                                    (0.625)        
                                                                   
Constant                   4.127***                4.057***        
                            (0.286)                 (0.352)        
                                                                   
-------------------------------------------------------------------
Observations                  739                     722          
R2                           0.081                   0.085         
Adjusted R2                  0.079                   0.081         
Residual Std. Error    3.083 (df = 736)        3.009 (df = 718)    
F Statistic         32.612*** (df = 2; 736) 22.200*** (df = 3; 718)
===================================================================
Note:                                   *p<0.1; **p<0.05; ***p<0.01


summary(df$SeatsNatPal.prop)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
0.00000 0.08769 0.29738 0.26110 0.40426 0.66834      17 
eff <- ggpredict(mod2.party, terms = "SeatsNatPal.prop [0.09, 0.40]")

eff_full <- ggpredict(mod2.party, terms = "SeatsNatPal.prop")
eff_full %>%
  plot +
  ylab("Predicted local staff size") +
  xlab("Party size in national parliament (proportion)") +
  ggtitle("Effect of national party size on MEP local staff",
          subtitle = "Controlling for OpenList and LaborCost")


Fundamental variation

mod1 <- lm(LocalAssistants ~ OpenList, df)
mod2 <- lm(LocalAssistants ~ OpenList + LaborCost, df)

df$resid_mod1 <- residuals(mod1)
df$resid_mod2 <- residuals(mod2)

ggplot(data.frame(r = residuals(mod1)), aes(r)) +
  geom_histogram(bins = 30) +
  ggtitle("Residuals: Model 1 (OpenList only)")


ggplot(data.frame(r = residuals(mod2)), aes(r)) +
  geom_histogram(bins = 30) +
  ggtitle("Residuals: Model 2 (OpenList + LaborCost)")


mean(residuals(mod1))
[1] 4.254603e-16
sd(residuals(mod1))
[1] 3.176834
mean(residuals(mod2))
[1] 5.910172e-16
sd(residuals(mod2))    
[1] 3.078466


vcov(mod2)
             (Intercept)      OpenList     LaborCost
(Intercept)  0.081867230 -0.0284615942 -0.0024363118
OpenList    -0.028461594  0.0519105737  0.0001759822
LaborCost   -0.002436312  0.0001759822  0.0001031140

sqrt(diag(vcov(mod2)))
(Intercept)    OpenList   LaborCost 
 0.28612450  0.22783892  0.01015451 

cov2cor(vcov(mod2))
            (Intercept)    OpenList   LaborCost
(Intercept)   1.0000000 -0.43659249 -0.83853084
OpenList     -0.4365925  1.00000000  0.07606449
LaborCost    -0.8385308  0.07606449  1.00000000

