Mig <- read.csv(file="C:/Users/Cassandra Carter/AppData/Local/Packages/Microsoft.MicrosoftEdge_8wekyb3d8bbwe/TempState/Downloads/Project.csv")
attach(Mig)
SqrtCrime <- (Crime)^.5
LogPercent <- log(Percent)
SqrtGDP <- (GDP)^.5
LogLife <- log(Life)
LogLit <- log(Literacy)
1.Original Life Plot
plot(Percent ~ Life, xlab = "Life Expectancy", ylab = "Percent Migrant Stock")
title(main="Life Expectancy vs. Percent Migrant Stock")
LogLife <- log(Life)
LogPercent <- log(Percent)
mymod4 <- lm(LogPercent ~ LogLife)
plot(LogPercent ~ LogLife, xlab="Log of Life Expectancy", ylab="Log of Percent Migrant Stock")
title(main="Transformed Life Expectancy vs. Percent Migrant Stock")
norm2 <- lm(LogPercent ~ LogLife)
plot(resid(norm2), xlab="Countries in Alphabetical Order", ylab = "Residuals After Transformation")
abline(0, 0)
title(main="Normality of Residuals")
MLRM <- lm(LogPercent~ UER + LogLife + LogLife + Economic + SqrtCrime + SqrtGDP + Water)
plot(MLRM)
## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced
## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced
summary(MLRM)
##
## Call:
## lm(formula = LogPercent ~ UER + LogLife + LogLife + Economic +
## SqrtCrime + SqrtGDP + Water)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.03404 -0.54939 0.02699 0.69758 2.66842
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.804176 9.306087 -0.839 0.403630
## UER 3.541327 1.041040 3.402 0.000954 ***
## LogLife 0.549107 2.129142 0.258 0.796998
## EconomicLow -0.001731 0.759858 -0.002 0.998187
## EconomicMiddle -0.238985 0.374691 -0.638 0.525007
## SqrtCrime -0.089829 0.116960 -0.768 0.444224
## SqrtGDP 0.017305 0.003374 5.130 1.37e-06 ***
## Water -0.014142 0.011292 -1.252 0.213235
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.1 on 103 degrees of freedom
## (112 observations deleted due to missingness)
## Multiple R-squared: 0.555, Adjusted R-squared: 0.5248
## F-statistic: 18.35 on 7 and 103 DF, p-value: 1.152e-15
MLRM2 <- lm(LogPercent ~ UER + LogLife + LogLife + Economic + SqrtCrime + SqrtGDP + Water + (Economic*UER))
plot(MLRM2)
## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced
## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced
summary(MLRM2)
##
## Call:
## lm(formula = LogPercent ~ UER + LogLife + LogLife + Economic +
## SqrtCrime + SqrtGDP + Water + (Economic * UER))
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.85518 -0.49589 -0.03293 0.69716 2.63793
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -5.729371 9.163369 -0.625 0.533
## UER 5.087724 4.257902 1.195 0.235
## LogLife 0.010270 2.109482 0.005 0.996
## EconomicLow 0.919656 0.923069 0.996 0.321
## EconomicMiddle -0.338921 0.538160 -0.630 0.530
## SqrtCrime -0.104548 0.114614 -0.912 0.364
## SqrtGDP 0.018553 0.003481 5.330 5.99e-07 ***
## Water -0.013281 0.011142 -1.192 0.236
## UER:EconomicLow -4.137983 4.503440 -0.919 0.360
## UER:EconomicMiddle 1.168387 4.436982 0.263 0.793
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.076 on 101 degrees of freedom
## (112 observations deleted due to missingness)
## Multiple R-squared: 0.5827, Adjusted R-squared: 0.5455
## F-statistic: 15.67 on 9 and 101 DF, p-value: 9.936e-16
Original <- lm(LogPercent ~ UER + LogLife + LogLife + Economic + SqrtCrime + SqrtGDP + Water)
anova(Original, MLRM2)
## Analysis of Variance Table
##
## Model 1: LogPercent ~ UER + LogLife + LogLife + Economic + SqrtCrime +
## SqrtGDP + Water
## Model 2: LogPercent ~ UER + LogLife + LogLife + Economic + SqrtCrime +
## SqrtGDP + Water + (Economic * UER)
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 103 124.74
## 2 101 116.99 2 7.7482 3.3445 0.03923 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
MLRM3 <- lm(LogPercent ~ UER + LogLife + LogLife + Economic + SqrtCrime + SqrtGDP + Water + (Economic*Crime))
anova(Original, MLRM3)
## Analysis of Variance Table
##
## Model 1: LogPercent ~ UER + LogLife + LogLife + Economic + SqrtCrime +
## SqrtGDP + Water
## Model 2: LogPercent ~ UER + LogLife + LogLife + Economic + SqrtCrime +
## SqrtGDP + Water + (Economic * Crime)
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 103 124.74
## 2 100 122.54 3 2.1976 0.5978 0.6179
qUER = UER^2
qLife = Life^2
qLit = Literacy^2
qEconomic = Economic^2
## Warning in Ops.factor(Economic, 2): '^' not meaningful for factors
qCrime = Crime^2
qGDP = GDP^2
qWater = Water^2
quadMLRM <- lm(LogPercent~ UER + LogLife + LogLife + Economic + SqrtCrime + SqrtGDP + Water + qUER + qLife + qLit + qCrime + qGDP + qWater)
summary(quadMLRM)
##
## Call:
## lm(formula = LogPercent ~ UER + LogLife + LogLife + Economic +
## SqrtCrime + SqrtGDP + Water + qUER + qLife + qLit + qCrime +
## qGDP + qWater)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.94745 -0.53398 -0.00002 0.60617 2.59290
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.426e+01 5.052e+01 -0.678 0.499291
## UER 8.193e+00 2.257e+00 3.629 0.000456 ***
## LogLife 7.248e+00 1.352e+01 0.536 0.593198
## EconomicLow 7.251e-01 8.326e-01 0.871 0.385988
## EconomicMiddle -2.826e-01 4.107e-01 -0.688 0.492963
## SqrtCrime -7.143e-03 3.130e-01 -0.023 0.981838
## SqrtGDP 2.102e-02 6.044e-03 3.478 0.000759 ***
## Water 5.860e-01 1.573e+00 0.373 0.710224
## qUER -6.753e+00 2.853e+00 -2.367 0.019935 *
## qLife -9.162e-04 1.324e-03 -0.692 0.490442
## qLit 1.115e+00 9.840e-01 1.133 0.260001
## qCrime -5.174e-05 2.461e-04 -0.210 0.833892
## qGDP -6.374e-11 1.055e-10 -0.604 0.547102
## qWater -5.919e-03 1.557e-02 -0.380 0.704659
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.081 on 97 degrees of freedom
## (112 observations deleted due to missingness)
## Multiple R-squared: 0.5955, Adjusted R-squared: 0.5413
## F-statistic: 10.98 on 13 and 97 DF, p-value: 4.996e-14
totalmod <- lm(LogPercent ~ UER + LogLife + LogLife + Economic +
SqrtCrime + SqrtGDP + Water + qUER ++ qLit + qCrime + qLife + qGDP + qWater+ (Economic*UER))
library(MASS)
stepAIC(totalmod, direction = "backward")
## Start: AIC=30.72
## LogPercent ~ UER + LogLife + LogLife + Economic + SqrtCrime +
## SqrtGDP + Water + qUER + +qLit + qCrime + qLife + qGDP +
## qWater + (Economic * UER)
##
## Df Sum of Sq RSS AIC
## - SqrtCrime 1 0.0442 109.77 28.764
## - Water 1 0.1105 109.84 28.831
## - qWater 1 0.1166 109.84 28.837
## - LogLife 1 0.1674 109.89 28.888
## - qCrime 1 0.2589 109.98 28.981
## - qUER 1 0.2622 109.99 28.984
## - qLife 1 0.3829 110.11 29.106
## - qGDP 1 1.2571 110.98 29.983
## - UER:Economic 2 3.6742 113.40 30.375
## - qLit 1 1.6919 111.42 30.417
## <none> 109.73 30.719
## - SqrtGDP 1 16.1106 125.84 43.926
##
## Step: AIC=28.76
## LogPercent ~ UER + LogLife + Economic + SqrtGDP + Water + qUER +
## qLit + qCrime + qLife + qGDP + qWater + UER:Economic
##
## Df Sum of Sq RSS AIC
## - Water 1 0.1187 109.89 26.884
## - qWater 1 0.1250 109.90 26.890
## - LogLife 1 0.1814 109.95 26.947
## - qUER 1 0.2612 110.03 27.027
## - qLife 1 0.4056 110.18 27.173
## - qCrime 1 0.6954 110.47 27.465
## - qGDP 1 1.4306 111.20 28.201
## - UER:Economic 2 3.6306 113.40 28.376
## - qLit 1 1.6553 111.43 28.425
## <none> 109.77 28.764
## - SqrtGDP 1 16.4701 126.24 42.281
##
## Step: AIC=26.88
## LogPercent ~ UER + LogLife + Economic + SqrtGDP + qUER + qLit +
## qCrime + qLife + qGDP + qWater + UER:Economic
##
## Df Sum of Sq RSS AIC
## - LogLife 1 0.2618 110.15 25.148
## - qUER 1 0.2737 110.16 25.160
## - qLife 1 0.4956 110.39 25.383
## - qCrime 1 0.9205 110.81 25.810
## - qWater 1 1.5994 111.49 26.488
## - qGDP 1 1.6161 111.50 26.504
## - UER:Economic 2 3.6737 113.56 26.534
## - qLit 1 1.8789 111.77 26.765
## <none> 109.89 26.884
## - SqrtGDP 1 17.5869 127.48 41.362
##
## Step: AIC=25.15
## LogPercent ~ UER + Economic + SqrtGDP + qUER + qLit + qCrime +
## qLife + qGDP + qWater + UER:Economic
##
## Df Sum of Sq RSS AIC
## - qUER 1 0.3433 110.49 23.493
## - qCrime 1 0.9368 111.09 24.088
## - qLife 1 1.3133 111.46 24.463
## - qGDP 1 1.5798 111.73 24.728
## - qWater 1 1.7476 111.90 24.895
## - UER:Economic 2 3.8800 114.03 24.990
## <none> 110.15 25.148
## - qLit 1 2.5320 112.68 25.670
## - SqrtGDP 1 17.3274 127.48 39.364
##
## Step: AIC=23.49
## LogPercent ~ UER + Economic + SqrtGDP + qLit + qCrime + qLife +
## qGDP + qWater + UER:Economic
##
## Df Sum of Sq RSS AIC
## - qCrime 1 0.8697 111.36 22.363
## - qLife 1 1.1699 111.66 22.662
## - qGDP 1 1.3296 111.82 22.821
## - qWater 1 1.7438 112.24 23.231
## <none> 110.49 23.493
## - qLit 1 2.6404 113.14 24.114
## - UER:Economic 2 10.2377 120.73 29.329
## - SqrtGDP 1 17.2124 127.71 37.563
##
## Step: AIC=22.36
## LogPercent ~ UER + Economic + SqrtGDP + qLit + qLife + qGDP +
## qWater + UER:Economic
##
## Df Sum of Sq RSS AIC
## - qLife 1 0.9352 112.30 21.292
## - qGDP 1 1.3929 112.76 21.743
## - qWater 1 1.7320 113.10 22.076
## <none> 111.36 22.363
## - qLit 1 3.1563 114.52 23.466
## - UER:Economic 2 9.9066 121.27 27.823
## - SqrtGDP 1 18.4282 129.79 37.361
##
## Step: AIC=21.29
## LogPercent ~ UER + Economic + SqrtGDP + qLit + qGDP + qWater +
## UER:Economic
##
## Df Sum of Sq RSS AIC
## - qGDP 1 1.0929 113.39 20.367
## - qWater 1 2.0149 114.31 21.266
## <none> 112.30 21.292
## - qLit 1 2.4776 114.78 21.714
## - UER:Economic 2 9.2791 121.58 26.104
## - SqrtGDP 1 17.6012 129.90 35.453
##
## Step: AIC=20.37
## LogPercent ~ UER + Economic + SqrtGDP + qLit + qWater + UER:Economic
##
## Df Sum of Sq RSS AIC
## - qWater 1 1.8297 115.22 20.143
## <none> 113.39 20.367
## - qLit 1 4.5637 117.96 22.747
## - UER:Economic 2 9.0155 122.41 24.859
## - SqrtGDP 1 30.7983 144.19 45.038
##
## Step: AIC=20.14
## LogPercent ~ UER + Economic + SqrtGDP + qLit + UER:Economic
##
## Df Sum of Sq RSS AIC
## <none> 115.22 20.143
## - qLit 1 4.429 119.65 22.330
## - UER:Economic 2 9.141 124.36 24.617
## - SqrtGDP 1 32.303 147.53 45.576
##
## Call:
## lm(formula = LogPercent ~ UER + Economic + SqrtGDP + qLit + UER:Economic)
##
## Coefficients:
## (Intercept) UER EconomicLow
## -7.62590 4.70544 1.51174
## EconomicMiddle SqrtGDP qLit
## -0.33526 0.01757 1.57743
## UER:EconomicLow UER:EconomicMiddle
## -4.33282 1.44678
percent = -13.80696 + 4.87037UER - 8.26776 LogLife +1.33476 EconLow -.25542EconMid +.01764 SqrtGDP + 7.80352qLit -4.07237(UEREconLow) +1.60495 (UEREconMid)
library(faraway)
## Warning: package 'faraway' was built under R version 3.4.4
finalmod <- lm(LogPercent ~ UER +SqrtGDP + LogLife + qLit + Economic + (UER*Economic))
plot(finalmod)
## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced
## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced
vif(finalmod)
## UER SqrtGDP LogLife
## 21.521507 4.503508 2.783962
## qLit EconomicLow EconomicMiddle
## 2.101997 3.594519 6.741804
## UER:EconomicLow UER:EconomicMiddle
## 16.162785 14.575499
summary(finalmod)
##
## Call:
## lm(formula = LogPercent ~ UER + SqrtGDP + LogLife + qLit + Economic +
## (UER * Economic))
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.90498 -0.62212 -0.05181 0.60017 2.64431
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.591266 9.189367 -0.064 0.9488
## UER 4.880953 4.157863 1.174 0.2432
## SqrtGDP 0.018234 0.003387 5.383 4.7e-07 ***
## LogLife -1.696754 2.204654 -0.770 0.4433
## qLit 1.831247 0.860085 2.129 0.0356 *
## EconomicLow 1.455965 0.919504 1.583 0.1164
## EconomicMiddle -0.365105 0.524940 -0.696 0.4883
## UER:EconomicLow -4.774532 4.390180 -1.088 0.2794
## UER:EconomicMiddle 1.219070 4.340690 0.281 0.7794
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.06 on 102 degrees of freedom
## (112 observations deleted due to missingness)
## Multiple R-squared: 0.5913, Adjusted R-squared: 0.5593
## F-statistic: 18.45 on 8 and 102 DF, p-value: < 2.2e-16
hist(resid(finalmod), xlab = "Residuals of the Final Model", main="Histogram of Residuals")