Mig <- read.csv("clipboard", header = TRUE, sep = "\t")
head(Mig)
## Country Crime Literacy Economic GDP Stock Percent Water Life
## 1 Albania 39.87 0.976 Middle 12500 52484 0.018 0.95 78.5
## 2 Algeria 50.68 0.802 Middle 15100 248624 0.006 0.84 77.0
## 3 Argentina 61.79 0.981 Middle 20700 2164524 0.049 0.99 77.3
## 4 Armenia 28.28 0.997 Middle 9100 190719 0.065 1.00 74.8
## 5 Austrailia 42.55 0.990 High 49900 7035560 0.288 1.00 82.3
## 6 Austria 20.41 0.980 High 49200 1660283 0.190 1.00 81.6
## UER
## 1 0.140
## 2 0.117
## 3 0.081
## 4 0.189
## 5 0.056
## 6 0.054
attach(Mig)
SqrtCrime <- (Crime)^.5
LogPercent <- log(Percent)
SqrtGDP <- (GDP)^.5
LogLife <- log(Life)
LogLit <- log(Literacy)
1.Original Life Plot
plot(Percent ~ Life, xlab = "Life Expectancy", ylab = "Percent Migrant Stock")
title(main="Life Expectancy vs. Percent Migrant Stock")
LogLife <- log(Life)
LogPercent <- log(Percent)
mymod4 <- lm(LogPercent ~ LogLife)
plot(LogPercent ~ LogLife, xlab="Log of Life Expectancy", ylab="Log of Percent Migrant Stock")
title(main="Transformed Life Expectancy vs. Percent Migrant Stock")
norm2 <- lm(LogPercent ~ LogLife)
plot(resid(norm2), xlab="Countries in Alphabetical Order", ylab = "Residuals After Transformation")
abline(0, 0)
title(main="Normality of Residuals")
MLRM <- lm(LogPercent~ UER + LogLife + LogLit + Economic + SqrtCrime + SqrtGDP + Water)
plot(MLRM)
## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced
## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced
summary(MLRM)
##
## Call:
## lm(formula = LogPercent ~ UER + LogLife + LogLit + Economic +
## SqrtCrime + SqrtGDP + Water)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.0948 -0.6075 0.0249 0.6891 2.6314
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.483852 10.085319 -0.345 0.73048
## UER 3.229853 1.077474 2.998 0.00342 **
## LogLife -0.408754 2.296828 -0.178 0.85910
## LogLit 1.277120 1.156095 1.105 0.27190
## EconomicLow 0.173827 0.775504 0.224 0.82309
## EconomicMiddle -0.280241 0.376149 -0.745 0.45797
## SqrtCrime -0.077467 0.117370 -0.660 0.51072
## SqrtGDP 0.016589 0.003432 4.834 4.75e-06 ***
## Water -0.014034 0.011280 -1.244 0.21629
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.099 on 102 degrees of freedom
## Multiple R-squared: 0.5603, Adjusted R-squared: 0.5258
## F-statistic: 16.25 on 8 and 102 DF, p-value: 2.896e-15
MLRM2 <- lm(LogPercent ~ UER + LogLife + LogLit + Economic + SqrtCrime + SqrtGDP + Water + (Economic*UER))
plot(MLRM2)
## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced
## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced
summary(MLRM2)
##
## Call:
## lm(formula = LogPercent ~ UER + LogLife + LogLit + Economic +
## SqrtCrime + SqrtGDP + Water + (Economic * UER))
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.92117 -0.60129 -0.03686 0.61004 2.65203
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.609845 9.920862 0.061 0.951
## UER 4.552339 4.238757 1.074 0.285
## LogLife -1.396628 2.270946 -0.615 0.540
## LogLit 1.822601 1.140275 1.598 0.113
## EconomicLow 1.240840 0.937825 1.323 0.189
## EconomicMiddle -0.432453 0.537261 -0.805 0.423
## SqrtCrime -0.088169 0.114202 -0.772 0.442
## SqrtGDP 0.017599 0.003505 5.021 2.25e-06 ***
## Water -0.013126 0.011058 -1.187 0.238
## UER:EconomicLow -4.310665 4.470476 -0.964 0.337
## UER:EconomicMiddle 1.562959 4.410133 0.354 0.724
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.068 on 100 degrees of freedom
## Multiple R-squared: 0.5931, Adjusted R-squared: 0.5524
## F-statistic: 14.57 on 10 and 100 DF, p-value: 1.239e-15
Original <- lm(LogPercent ~ UER + LogLife + LogLit + Economic + SqrtCrime + SqrtGDP + Water)
anova(Original, MLRM2)
## Analysis of Variance Table
##
## Model 1: LogPercent ~ UER + LogLife + LogLit + Economic + SqrtCrime +
## SqrtGDP + Water
## Model 2: LogPercent ~ UER + LogLife + LogLit + Economic + SqrtCrime +
## SqrtGDP + Water + (Economic * UER)
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 102 123.27
## 2 100 114.08 2 9.1879 4.027 0.02079 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
MLRM3 <- lm(LogPercent ~ UER + LogLife + LogLit + Economic + SqrtCrime + SqrtGDP + Water + (Economic*Crime))
anova(Original, MLRM3)
## Analysis of Variance Table
##
## Model 1: LogPercent ~ UER + LogLife + LogLit + Economic + SqrtCrime +
## SqrtGDP + Water
## Model 2: LogPercent ~ UER + LogLife + LogLit + Economic + SqrtCrime +
## SqrtGDP + Water + (Economic * Crime)
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 102 123.27
## 2 99 120.17 3 3.1015 0.8517 0.4689
qUER = UER^2
qLife = Life^2
qLit = Literacy^2
qEconomic = Economic^2
## Warning in Ops.factor(Economic, 2): '^' not meaningful for factors
qCrime = Crime^2
qGDP = GDP^2
qWater = Water^2
quadMLRM <- lm(LogPercent~ UER + LogLife + LogLit + Economic + SqrtCrime + SqrtGDP + Water + qUER + qLife + qLit + qCrime + qGDP + qWater)
summary(quadMLRM)
##
## Call:
## lm(formula = LogPercent ~ UER + LogLife + LogLit + Economic +
## SqrtCrime + SqrtGDP + Water + qUER + qLife + qLit + qCrime +
## qGDP + qWater)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.95057 -0.61376 0.02908 0.65232 2.64391
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -5.816e+01 5.291e+01 -1.099 0.274336
## UER 8.026e+00 2.248e+00 3.571 0.000558 ***
## LogLife 1.197e+01 1.384e+01 0.865 0.389198
## LogLit -7.683e+00 5.325e+00 -1.443 0.152311
## EconomicLow 6.061e-01 8.321e-01 0.728 0.468174
## EconomicMiddle -2.229e-01 4.105e-01 -0.543 0.588303
## SqrtCrime -8.636e-03 3.112e-01 -0.028 0.977921
## SqrtGDP 2.123e-02 6.012e-03 3.530 0.000639 ***
## Water 8.257e-01 1.573e+00 0.525 0.600740
## qUER -5.917e+00 2.896e+00 -2.043 0.043776 *
## qLife -1.335e-03 1.348e-03 -0.990 0.324542
## qLit 6.742e+00 4.021e+00 1.677 0.096839 .
## qCrime -1.885e-05 2.458e-04 -0.077 0.939012
## qGDP -6.228e-11 1.049e-10 -0.594 0.554145
## qWater -8.289e-03 1.557e-02 -0.532 0.595713
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.075 on 96 degrees of freedom
## Multiple R-squared: 0.6041, Adjusted R-squared: 0.5463
## F-statistic: 10.46 on 14 and 96 DF, p-value: 6.618e-14
totalmod <- lm(LogPercent ~ UER + LogLife + Economic + LogLit +
SqrtCrime + SqrtGDP + Water + qUER + qLit + qCrime + qLife + qGDP + qWater+ (Economic*UER))
library(MASS)
stepAIC(totalmod, direction = "backward")
## Start: AIC=28.27
## LogPercent ~ UER + LogLife + Economic + LogLit + SqrtCrime +
## SqrtGDP + Water + qUER + qLit + qCrime + qLife + qGDP + qWater +
## (Economic * UER)
##
## Df Sum of Sq RSS AIC
## - SqrtCrime 1 0.0481 105.47 26.323
## - qCrime 1 0.1187 105.54 26.397
## - Water 1 0.2940 105.71 26.582
## - qWater 1 0.3036 105.72 26.592
## - LogLife 1 0.6924 106.11 26.999
## - qLife 1 1.0200 106.44 27.341
## - qUER 1 1.3500 106.77 27.685
## - qGDP 1 1.4987 106.92 27.839
## <none> 105.42 28.272
## - UER:Economic 2 5.5757 110.99 29.993
## - LogLit 1 4.3086 109.73 30.719
## - qLit 1 5.3934 110.81 31.811
## - SqrtGDP 1 17.5645 122.98 43.379
##
## Step: AIC=26.32
## LogPercent ~ UER + LogLife + Economic + LogLit + SqrtGDP + Water +
## qUER + qLit + qCrime + qLife + qGDP + qWater + UER:Economic
##
## Df Sum of Sq RSS AIC
## - qCrime 1 0.1364 105.60 24.467
## - Water 1 0.3081 105.77 24.647
## - qWater 1 0.3178 105.78 24.657
## - LogLife 1 0.7226 106.19 25.081
## - qLife 1 1.0594 106.53 25.433
## - qUER 1 1.3471 106.81 25.732
## - qGDP 1 1.6991 107.17 26.097
## <none> 105.47 26.323
## - UER:Economic 2 5.5285 110.99 27.994
## - LogLit 1 4.3047 109.77 28.764
## - qLit 1 5.3702 110.84 29.836
## - SqrtGDP 1 17.9515 123.42 41.771
##
## Step: AIC=24.47
## LogPercent ~ UER + LogLife + Economic + LogLit + SqrtGDP + Water +
## qUER + qLit + qLife + qGDP + qWater + UER:Economic
##
## Df Sum of Sq RSS AIC
## - Water 1 0.4699 106.07 22.959
## - qWater 1 0.4823 106.08 22.972
## - LogLife 1 0.7260 106.33 23.227
## - qLife 1 1.0535 106.66 23.568
## - qUER 1 1.3506 106.95 23.877
## - qGDP 1 1.6814 107.28 24.220
## <none> 105.60 24.467
## - UER:Economic 2 5.4712 111.07 26.073
## - LogLit 1 4.8636 110.47 27.465
## - qLit 1 6.0785 111.68 28.679
## - SqrtGDP 1 18.2426 123.84 40.154
##
## Step: AIC=22.96
## LogPercent ~ UER + LogLife + Economic + LogLit + SqrtGDP + qUER +
## qLit + qLife + qGDP + qWater + UER:Economic
##
## Df Sum of Sq RSS AIC
## - LogLife 1 1.0123 107.08 22.014
## - qLife 1 1.2966 107.37 22.308
## - qUER 1 1.3503 107.42 22.364
## - qWater 1 1.4645 107.54 22.482
## <none> 106.07 22.959
## - qGDP 1 2.0332 108.11 23.067
## - UER:Economic 2 5.4487 111.52 24.520
## - LogLit 1 4.7375 110.81 25.810
## - qLit 1 6.0985 112.17 27.165
## - SqrtGDP 1 20.2399 126.31 40.344
##
## Step: AIC=22.01
## LogPercent ~ UER + Economic + LogLit + SqrtGDP + qUER + qLit +
## qLife + qGDP + qWater + UER:Economic
##
## Df Sum of Sq RSS AIC
## - qLife 1 0.7520 107.84 20.791
## - qUER 1 1.4213 108.51 21.477
## - qWater 1 1.7401 108.83 21.803
## - qGDP 1 1.9149 109.00 21.981
## <none> 107.08 22.014
## - UER:Economic 2 5.6133 112.70 23.685
## - LogLit 1 4.0033 111.09 24.088
## - qLit 1 5.4928 112.58 25.566
## - SqrtGDP 1 19.3415 126.43 38.444
##
## Step: AIC=20.79
## LogPercent ~ UER + Economic + LogLit + SqrtGDP + qUER + qLit +
## qGDP + qWater + UER:Economic
##
## Df Sum of Sq RSS AIC
## - qUER 1 1.2644 109.10 20.084
## - qGDP 1 1.5764 109.41 20.401
## <none> 107.84 20.791
## - qWater 1 1.9921 109.83 20.822
## - UER:Economic 2 5.2484 113.08 22.066
## - LogLit 1 4.2953 112.13 23.126
## - qLit 1 5.5549 113.39 24.366
## - SqrtGDP 1 18.8634 126.70 36.684
##
## Step: AIC=20.08
## LogPercent ~ UER + Economic + LogLit + SqrtGDP + qLit + qGDP +
## qWater + UER:Economic
##
## Df Sum of Sq RSS AIC
## - qGDP 1 1.0556 110.16 19.153
## - qWater 1 1.9590 111.06 20.060
## <none> 109.10 20.084
## - LogLit 1 3.1984 112.30 21.292
## - qLit 1 4.4191 113.52 22.492
## - UER:Economic 2 8.9558 118.06 24.841
## - SqrtGDP 1 17.5997 126.70 34.685
##
## Step: AIC=19.15
## LogPercent ~ UER + Economic + LogLit + SqrtGDP + qLit + qWater +
## UER:Economic
##
## Df Sum of Sq RSS AIC
## - qWater 1 1.7792 111.94 18.932
## <none> 110.16 19.153
## - LogLit 1 3.2357 113.39 20.367
## - qLit 1 4.8975 115.05 21.982
## - UER:Economic 2 8.6813 118.84 23.573
## - SqrtGDP 1 31.0532 141.21 44.719
##
## Step: AIC=18.93
## LogPercent ~ UER + Economic + LogLit + SqrtGDP + qLit + UER:Economic
##
## Df Sum of Sq RSS AIC
## <none> 111.94 18.932
## - LogLit 1 3.286 115.22 20.143
## - qLit 1 4.927 116.86 21.714
## - UER:Economic 2 8.783 120.72 23.317
## - SqrtGDP 1 32.550 144.49 45.265
##
## Call:
## lm(formula = LogPercent ~ UER + Economic + LogLit + SqrtGDP +
## qLit + UER:Economic)
##
## Coefficients:
## (Intercept) UER EconomicLow
## -13.80696 4.87037 1.33476
## EconomicMiddle LogLit SqrtGDP
## -0.25542 -8.26776 0.01764
## qLit UER:EconomicLow UER:EconomicMiddle
## 7.80352 -4.07237 1.60495
percent = -13.80696 + 4.87037(.003) - 8.26776* ln(.772) -.25542 +.01764(4000) + 7.80352(.772^2) +1.60495
lit .772 uer .003 gde 4000 econ middle 10. VIF
library(faraway)
finalmod <- lm(LogPercent ~ UER +SqrtGDP + LogLife + qLit + Economic + (UER*Economic))
plot(finalmod)
## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced
## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced
vif(finalmod)
## UER SqrtGDP LogLife
## 21.521507 4.503508 2.783962
## qLit EconomicLow EconomicMiddle
## 2.101997 3.594519 6.741804
## UER:EconomicLow UER:EconomicMiddle
## 16.162785 14.575499
summary(finalmod)
##
## Call:
## lm(formula = LogPercent ~ UER + SqrtGDP + LogLife + qLit + Economic +
## (UER * Economic))
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.90498 -0.62212 -0.05181 0.60017 2.64431
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.591266 9.189367 -0.064 0.9488
## UER 4.880953 4.157863 1.174 0.2432
## SqrtGDP 0.018234 0.003387 5.383 4.7e-07 ***
## LogLife -1.696754 2.204654 -0.770 0.4433
## qLit 1.831247 0.860085 2.129 0.0356 *
## EconomicLow 1.455965 0.919504 1.583 0.1164
## EconomicMiddle -0.365105 0.524940 -0.696 0.4883
## UER:EconomicLow -4.774532 4.390180 -1.088 0.2794
## UER:EconomicMiddle 1.219070 4.340690 0.281 0.7794
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.06 on 102 degrees of freedom
## Multiple R-squared: 0.5913, Adjusted R-squared: 0.5593
## F-statistic: 18.45 on 8 and 102 DF, p-value: < 2.2e-16
lit .772 uer .003 gde 4000 econ middle
hist(resid(finalmod), xlab = "Residuals of the Final Model", main="Histogram of Residuals")
confint(finalmod)
## 2.5 % 97.5 %
## (Intercept) -18.81833155 17.63580032
## UER -3.36614904 13.12805601
## SqrtGDP 0.01151552 0.02495275
## LogLife -6.06967554 2.67616736
## qLit 0.12527227 3.53722205
## EconomicLow -0.36786748 3.27979750
## EconomicMiddle -1.40632066 0.67611035
## UER:EconomicLow -13.48243252 3.93336886
## UER:EconomicMiddle -7.39066909 9.82880878