Mig <- read.csv(file="C:/Users/Cassandra Carter/AppData/Local/Packages/Microsoft.MicrosoftEdge_8wekyb3d8bbwe/TempState/Downloads/Project.csv")
attach(Mig)
SqrtCrime <- (Crime)^.5
LogPercent <- log(Percent)
SqrtGDP <- (GDP)^.5
LogLife <- log(Life)
LogLit <- log(Literacy)

1.Original Life Plot

plot(Percent ~ Life, xlab = "Life Expectancy", ylab = "Percent Migrant Stock")
title(main="Life Expectancy vs. Percent Migrant Stock")

  1. Log of Life plot to show the changes
LogLife <- log(Life)
LogPercent <- log(Percent)
mymod4 <- lm(LogPercent ~ LogLife)
plot(LogPercent ~ LogLife, xlab="Log of Life Expectancy", ylab="Log of Percent Migrant Stock")
title(main="Transformed Life Expectancy vs. Percent Migrant Stock")

  1. Resids of Log of Life to show normality
norm2 <- lm(LogPercent ~ LogLife)
plot(resid(norm2), xlab="Countries in Alphabetical Order", ylab = "Residuals After Transformation")
abline(0, 0)
title(main="Normality of Residuals")

  1. BUilding the full model
MLRM <- lm(LogPercent~ UER + LogLife + LogLife + Economic + SqrtCrime + SqrtGDP + Water)
plot(MLRM)

## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced

## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced

summary(MLRM)
## 
## Call:
## lm(formula = LogPercent ~ UER + LogLife + LogLife + Economic + 
##     SqrtCrime + SqrtGDP + Water)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -3.03404 -0.54939  0.02699  0.69758  2.66842 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    -7.804176   9.306087  -0.839 0.403630    
## UER             3.541327   1.041040   3.402 0.000954 ***
## LogLife         0.549107   2.129142   0.258 0.796998    
## EconomicLow    -0.001731   0.759858  -0.002 0.998187    
## EconomicMiddle -0.238985   0.374691  -0.638 0.525007    
## SqrtCrime      -0.089829   0.116960  -0.768 0.444224    
## SqrtGDP         0.017305   0.003374   5.130 1.37e-06 ***
## Water          -0.014142   0.011292  -1.252 0.213235    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.1 on 103 degrees of freedom
##   (112 observations deleted due to missingness)
## Multiple R-squared:  0.555,  Adjusted R-squared:  0.5248 
## F-statistic: 18.35 on 7 and 103 DF,  p-value: 1.152e-15
  1. Interaction with Econ and UER
MLRM2 <- lm(LogPercent ~ UER + LogLife + LogLife + Economic + SqrtCrime + SqrtGDP + Water + (Economic*UER))
plot(MLRM2)

## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced

## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced

summary(MLRM2)
## 
## Call:
## lm(formula = LogPercent ~ UER + LogLife + LogLife + Economic + 
##     SqrtCrime + SqrtGDP + Water + (Economic * UER))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.85518 -0.49589 -0.03293  0.69716  2.63793 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        -5.729371   9.163369  -0.625    0.533    
## UER                 5.087724   4.257902   1.195    0.235    
## LogLife             0.010270   2.109482   0.005    0.996    
## EconomicLow         0.919656   0.923069   0.996    0.321    
## EconomicMiddle     -0.338921   0.538160  -0.630    0.530    
## SqrtCrime          -0.104548   0.114614  -0.912    0.364    
## SqrtGDP             0.018553   0.003481   5.330 5.99e-07 ***
## Water              -0.013281   0.011142  -1.192    0.236    
## UER:EconomicLow    -4.137983   4.503440  -0.919    0.360    
## UER:EconomicMiddle  1.168387   4.436982   0.263    0.793    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.076 on 101 degrees of freedom
##   (112 observations deleted due to missingness)
## Multiple R-squared:  0.5827, Adjusted R-squared:  0.5455 
## F-statistic: 15.67 on 9 and 101 DF,  p-value: 9.936e-16
  1. Interaction with Econ and UER
Original <- lm(LogPercent ~ UER + LogLife + LogLife + Economic + SqrtCrime + SqrtGDP + Water)
anova(Original, MLRM2)
## Analysis of Variance Table
## 
## Model 1: LogPercent ~ UER + LogLife + LogLife + Economic + SqrtCrime + 
##     SqrtGDP + Water
## Model 2: LogPercent ~ UER + LogLife + LogLife + Economic + SqrtCrime + 
##     SqrtGDP + Water + (Economic * UER)
##   Res.Df    RSS Df Sum of Sq      F  Pr(>F)  
## 1    103 124.74                              
## 2    101 116.99  2    7.7482 3.3445 0.03923 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
  1. Interaction with Econ and Crime
MLRM3 <- lm(LogPercent ~ UER + LogLife + LogLife + Economic + SqrtCrime + SqrtGDP + Water + (Economic*Crime))
anova(Original, MLRM3)
## Analysis of Variance Table
## 
## Model 1: LogPercent ~ UER + LogLife + LogLife + Economic + SqrtCrime + 
##     SqrtGDP + Water
## Model 2: LogPercent ~ UER + LogLife + LogLife + Economic + SqrtCrime + 
##     SqrtGDP + Water + (Economic * Crime)
##   Res.Df    RSS Df Sum of Sq      F Pr(>F)
## 1    103 124.74                           
## 2    100 122.54  3    2.1976 0.5978 0.6179
  1. Quad terms
qUER = UER^2
qLife = Life^2
qLit = Literacy^2
qEconomic = Economic^2
## Warning in Ops.factor(Economic, 2): '^' not meaningful for factors
qCrime = Crime^2
qGDP = GDP^2
qWater = Water^2
quadMLRM <- lm(LogPercent~ UER + LogLife + LogLife + Economic + SqrtCrime + SqrtGDP + Water + qUER + qLife + qLit + qCrime + qGDP + qWater)
summary(quadMLRM)
## 
## Call:
## lm(formula = LogPercent ~ UER + LogLife + LogLife + Economic + 
##     SqrtCrime + SqrtGDP + Water + qUER + qLife + qLit + qCrime + 
##     qGDP + qWater)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.94745 -0.53398 -0.00002  0.60617  2.59290 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    -3.426e+01  5.052e+01  -0.678 0.499291    
## UER             8.193e+00  2.257e+00   3.629 0.000456 ***
## LogLife         7.248e+00  1.352e+01   0.536 0.593198    
## EconomicLow     7.251e-01  8.326e-01   0.871 0.385988    
## EconomicMiddle -2.826e-01  4.107e-01  -0.688 0.492963    
## SqrtCrime      -7.143e-03  3.130e-01  -0.023 0.981838    
## SqrtGDP         2.102e-02  6.044e-03   3.478 0.000759 ***
## Water           5.860e-01  1.573e+00   0.373 0.710224    
## qUER           -6.753e+00  2.853e+00  -2.367 0.019935 *  
## qLife          -9.162e-04  1.324e-03  -0.692 0.490442    
## qLit            1.115e+00  9.840e-01   1.133 0.260001    
## qCrime         -5.174e-05  2.461e-04  -0.210 0.833892    
## qGDP           -6.374e-11  1.055e-10  -0.604 0.547102    
## qWater         -5.919e-03  1.557e-02  -0.380 0.704659    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.081 on 97 degrees of freedom
##   (112 observations deleted due to missingness)
## Multiple R-squared:  0.5955, Adjusted R-squared:  0.5413 
## F-statistic: 10.98 on 13 and 97 DF,  p-value: 4.996e-14
  1. AIC
totalmod <- lm(LogPercent ~ UER + LogLife + LogLife + Economic + 
    SqrtCrime + SqrtGDP + Water + qUER ++ qLit + qCrime + qLife + qGDP + qWater+ (Economic*UER))
library(MASS)
stepAIC(totalmod, direction = "backward")
## Start:  AIC=30.72
## LogPercent ~ UER + LogLife + LogLife + Economic + SqrtCrime + 
##     SqrtGDP + Water + qUER + +qLit + qCrime + qLife + qGDP + 
##     qWater + (Economic * UER)
## 
##                Df Sum of Sq    RSS    AIC
## - SqrtCrime     1    0.0442 109.77 28.764
## - Water         1    0.1105 109.84 28.831
## - qWater        1    0.1166 109.84 28.837
## - LogLife       1    0.1674 109.89 28.888
## - qCrime        1    0.2589 109.98 28.981
## - qUER          1    0.2622 109.99 28.984
## - qLife         1    0.3829 110.11 29.106
## - qGDP          1    1.2571 110.98 29.983
## - UER:Economic  2    3.6742 113.40 30.375
## - qLit          1    1.6919 111.42 30.417
## <none>                      109.73 30.719
## - SqrtGDP       1   16.1106 125.84 43.926
## 
## Step:  AIC=28.76
## LogPercent ~ UER + LogLife + Economic + SqrtGDP + Water + qUER + 
##     qLit + qCrime + qLife + qGDP + qWater + UER:Economic
## 
##                Df Sum of Sq    RSS    AIC
## - Water         1    0.1187 109.89 26.884
## - qWater        1    0.1250 109.90 26.890
## - LogLife       1    0.1814 109.95 26.947
## - qUER          1    0.2612 110.03 27.027
## - qLife         1    0.4056 110.18 27.173
## - qCrime        1    0.6954 110.47 27.465
## - qGDP          1    1.4306 111.20 28.201
## - UER:Economic  2    3.6306 113.40 28.376
## - qLit          1    1.6553 111.43 28.425
## <none>                      109.77 28.764
## - SqrtGDP       1   16.4701 126.24 42.281
## 
## Step:  AIC=26.88
## LogPercent ~ UER + LogLife + Economic + SqrtGDP + qUER + qLit + 
##     qCrime + qLife + qGDP + qWater + UER:Economic
## 
##                Df Sum of Sq    RSS    AIC
## - LogLife       1    0.2618 110.15 25.148
## - qUER          1    0.2737 110.16 25.160
## - qLife         1    0.4956 110.39 25.383
## - qCrime        1    0.9205 110.81 25.810
## - qWater        1    1.5994 111.49 26.488
## - qGDP          1    1.6161 111.50 26.504
## - UER:Economic  2    3.6737 113.56 26.534
## - qLit          1    1.8789 111.77 26.765
## <none>                      109.89 26.884
## - SqrtGDP       1   17.5869 127.48 41.362
## 
## Step:  AIC=25.15
## LogPercent ~ UER + Economic + SqrtGDP + qUER + qLit + qCrime + 
##     qLife + qGDP + qWater + UER:Economic
## 
##                Df Sum of Sq    RSS    AIC
## - qUER          1    0.3433 110.49 23.493
## - qCrime        1    0.9368 111.09 24.088
## - qLife         1    1.3133 111.46 24.463
## - qGDP          1    1.5798 111.73 24.728
## - qWater        1    1.7476 111.90 24.895
## - UER:Economic  2    3.8800 114.03 24.990
## <none>                      110.15 25.148
## - qLit          1    2.5320 112.68 25.670
## - SqrtGDP       1   17.3274 127.48 39.364
## 
## Step:  AIC=23.49
## LogPercent ~ UER + Economic + SqrtGDP + qLit + qCrime + qLife + 
##     qGDP + qWater + UER:Economic
## 
##                Df Sum of Sq    RSS    AIC
## - qCrime        1    0.8697 111.36 22.363
## - qLife         1    1.1699 111.66 22.662
## - qGDP          1    1.3296 111.82 22.821
## - qWater        1    1.7438 112.24 23.231
## <none>                      110.49 23.493
## - qLit          1    2.6404 113.14 24.114
## - UER:Economic  2   10.2377 120.73 29.329
## - SqrtGDP       1   17.2124 127.71 37.563
## 
## Step:  AIC=22.36
## LogPercent ~ UER + Economic + SqrtGDP + qLit + qLife + qGDP + 
##     qWater + UER:Economic
## 
##                Df Sum of Sq    RSS    AIC
## - qLife         1    0.9352 112.30 21.292
## - qGDP          1    1.3929 112.76 21.743
## - qWater        1    1.7320 113.10 22.076
## <none>                      111.36 22.363
## - qLit          1    3.1563 114.52 23.466
## - UER:Economic  2    9.9066 121.27 27.823
## - SqrtGDP       1   18.4282 129.79 37.361
## 
## Step:  AIC=21.29
## LogPercent ~ UER + Economic + SqrtGDP + qLit + qGDP + qWater + 
##     UER:Economic
## 
##                Df Sum of Sq    RSS    AIC
## - qGDP          1    1.0929 113.39 20.367
## - qWater        1    2.0149 114.31 21.266
## <none>                      112.30 21.292
## - qLit          1    2.4776 114.78 21.714
## - UER:Economic  2    9.2791 121.58 26.104
## - SqrtGDP       1   17.6012 129.90 35.453
## 
## Step:  AIC=20.37
## LogPercent ~ UER + Economic + SqrtGDP + qLit + qWater + UER:Economic
## 
##                Df Sum of Sq    RSS    AIC
## - qWater        1    1.8297 115.22 20.143
## <none>                      113.39 20.367
## - qLit          1    4.5637 117.96 22.747
## - UER:Economic  2    9.0155 122.41 24.859
## - SqrtGDP       1   30.7983 144.19 45.038
## 
## Step:  AIC=20.14
## LogPercent ~ UER + Economic + SqrtGDP + qLit + UER:Economic
## 
##                Df Sum of Sq    RSS    AIC
## <none>                      115.22 20.143
## - qLit          1     4.429 119.65 22.330
## - UER:Economic  2     9.141 124.36 24.617
## - SqrtGDP       1    32.303 147.53 45.576
## 
## Call:
## lm(formula = LogPercent ~ UER + Economic + SqrtGDP + qLit + UER:Economic)
## 
## Coefficients:
##        (Intercept)                 UER         EconomicLow  
##           -7.62590             4.70544             1.51174  
##     EconomicMiddle             SqrtGDP                qLit  
##           -0.33526             0.01757             1.57743  
##    UER:EconomicLow  UER:EconomicMiddle  
##           -4.33282             1.44678

percent = -13.80696 + 4.87037UER - 8.26776 LogLife +1.33476 EconLow -.25542EconMid +.01764 SqrtGDP + 7.80352qLit -4.07237(UEREconLow) +1.60495 (UEREconMid)

  1. VIF
library(faraway)
## Warning: package 'faraway' was built under R version 3.4.4
finalmod <- lm(LogPercent ~ UER +SqrtGDP + LogLife + qLit + Economic + (UER*Economic))
plot(finalmod)

## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced
## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced

vif(finalmod)
##                UER            SqrtGDP            LogLife 
##          21.521507           4.503508           2.783962 
##               qLit        EconomicLow     EconomicMiddle 
##           2.101997           3.594519           6.741804 
##    UER:EconomicLow UER:EconomicMiddle 
##          16.162785          14.575499
summary(finalmod)
## 
## Call:
## lm(formula = LogPercent ~ UER + SqrtGDP + LogLife + qLit + Economic + 
##     (UER * Economic))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.90498 -0.62212 -0.05181  0.60017  2.64431 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        -0.591266   9.189367  -0.064   0.9488    
## UER                 4.880953   4.157863   1.174   0.2432    
## SqrtGDP             0.018234   0.003387   5.383  4.7e-07 ***
## LogLife            -1.696754   2.204654  -0.770   0.4433    
## qLit                1.831247   0.860085   2.129   0.0356 *  
## EconomicLow         1.455965   0.919504   1.583   0.1164    
## EconomicMiddle     -0.365105   0.524940  -0.696   0.4883    
## UER:EconomicLow    -4.774532   4.390180  -1.088   0.2794    
## UER:EconomicMiddle  1.219070   4.340690   0.281   0.7794    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.06 on 102 degrees of freedom
##   (112 observations deleted due to missingness)
## Multiple R-squared:  0.5913, Adjusted R-squared:  0.5593 
## F-statistic: 18.45 on 8 and 102 DF,  p-value: < 2.2e-16
hist(resid(finalmod), xlab = "Residuals of the Final Model", main="Histogram of Residuals")