Mig <- read.csv("clipboard", header = TRUE, sep = "\t")
head(Mig)
##      Country Crime Literacy Economic   GDP   Stock Percent Water Life
## 1    Albania 39.87    0.976   Middle 12500   52484   0.018  0.95 78.5
## 2    Algeria 50.68    0.802   Middle 15100  248624   0.006  0.84 77.0
## 3  Argentina 61.79    0.981   Middle 20700 2164524   0.049  0.99 77.3
## 4    Armenia 28.28    0.997   Middle  9100  190719   0.065  1.00 74.8
## 5 Austrailia 42.55    0.990     High 49900 7035560   0.288  1.00 82.3
## 6    Austria 20.41    0.980     High 49200 1660283   0.190  1.00 81.6
##     UER
## 1 0.140
## 2 0.117
## 3 0.081
## 4 0.189
## 5 0.056
## 6 0.054
attach(Mig)
SqrtCrime <- (Crime)^.5
LogPercent <- log(Percent)
SqrtGDP <- (GDP)^.5
LogLife <- log(Life)
LogLit <- log(Literacy)

1.Original Life Plot

plot(Percent ~ Life, xlab = "Life Expectancy", ylab = "Percent Migrant Stock")
title(main="Life Expectancy vs. Percent Migrant Stock")

  1. Log of Life plot to show the changes
LogLife <- log(Life)
LogPercent <- log(Percent)
mymod4 <- lm(LogPercent ~ LogLife)
plot(LogPercent ~ LogLife, xlab="Log of Life Expectancy", ylab="Log of Percent Migrant Stock")
title(main="Transformed Life Expectancy vs. Percent Migrant Stock")

  1. Resids of Log of Life to show normality
norm2 <- lm(LogPercent ~ LogLife)
plot(resid(norm2), xlab="Countries in Alphabetical Order", ylab = "Residuals After Transformation")
abline(0, 0)
title(main="Normality of Residuals")

  1. BUilding the full model
MLRM <- lm(LogPercent~ UER + LogLife + LogLit + Economic + SqrtCrime + SqrtGDP + Water)
plot(MLRM)

## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced

## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced

summary(MLRM)
## 
## Call:
## lm(formula = LogPercent ~ UER + LogLife + LogLit + Economic + 
##     SqrtCrime + SqrtGDP + Water)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.0948 -0.6075  0.0249  0.6891  2.6314 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    -3.483852  10.085319  -0.345  0.73048    
## UER             3.229853   1.077474   2.998  0.00342 ** 
## LogLife        -0.408754   2.296828  -0.178  0.85910    
## LogLit          1.277120   1.156095   1.105  0.27190    
## EconomicLow     0.173827   0.775504   0.224  0.82309    
## EconomicMiddle -0.280241   0.376149  -0.745  0.45797    
## SqrtCrime      -0.077467   0.117370  -0.660  0.51072    
## SqrtGDP         0.016589   0.003432   4.834 4.75e-06 ***
## Water          -0.014034   0.011280  -1.244  0.21629    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.099 on 102 degrees of freedom
## Multiple R-squared:  0.5603, Adjusted R-squared:  0.5258 
## F-statistic: 16.25 on 8 and 102 DF,  p-value: 2.896e-15
  1. Interaction with Econ and UER
MLRM2 <- lm(LogPercent ~ UER + LogLife + LogLit + Economic + SqrtCrime + SqrtGDP + Water + (Economic*UER))
plot(MLRM2)

## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced

## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced

summary(MLRM2)
## 
## Call:
## lm(formula = LogPercent ~ UER + LogLife + LogLit + Economic + 
##     SqrtCrime + SqrtGDP + Water + (Economic * UER))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.92117 -0.60129 -0.03686  0.61004  2.65203 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         0.609845   9.920862   0.061    0.951    
## UER                 4.552339   4.238757   1.074    0.285    
## LogLife            -1.396628   2.270946  -0.615    0.540    
## LogLit              1.822601   1.140275   1.598    0.113    
## EconomicLow         1.240840   0.937825   1.323    0.189    
## EconomicMiddle     -0.432453   0.537261  -0.805    0.423    
## SqrtCrime          -0.088169   0.114202  -0.772    0.442    
## SqrtGDP             0.017599   0.003505   5.021 2.25e-06 ***
## Water              -0.013126   0.011058  -1.187    0.238    
## UER:EconomicLow    -4.310665   4.470476  -0.964    0.337    
## UER:EconomicMiddle  1.562959   4.410133   0.354    0.724    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.068 on 100 degrees of freedom
## Multiple R-squared:  0.5931, Adjusted R-squared:  0.5524 
## F-statistic: 14.57 on 10 and 100 DF,  p-value: 1.239e-15
  1. Interaction with Econ and UER
Original <- lm(LogPercent ~ UER + LogLife + LogLit + Economic + SqrtCrime + SqrtGDP + Water)
anova(Original, MLRM2)
## Analysis of Variance Table
## 
## Model 1: LogPercent ~ UER + LogLife + LogLit + Economic + SqrtCrime + 
##     SqrtGDP + Water
## Model 2: LogPercent ~ UER + LogLife + LogLit + Economic + SqrtCrime + 
##     SqrtGDP + Water + (Economic * UER)
##   Res.Df    RSS Df Sum of Sq     F  Pr(>F)  
## 1    102 123.27                             
## 2    100 114.08  2    9.1879 4.027 0.02079 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
  1. Interaction with Econ and Crime
MLRM3 <- lm(LogPercent ~ UER + LogLife + LogLit + Economic + SqrtCrime + SqrtGDP + Water + (Economic*Crime))
anova(Original, MLRM3)
## Analysis of Variance Table
## 
## Model 1: LogPercent ~ UER + LogLife + LogLit + Economic + SqrtCrime + 
##     SqrtGDP + Water
## Model 2: LogPercent ~ UER + LogLife + LogLit + Economic + SqrtCrime + 
##     SqrtGDP + Water + (Economic * Crime)
##   Res.Df    RSS Df Sum of Sq      F Pr(>F)
## 1    102 123.27                           
## 2     99 120.17  3    3.1015 0.8517 0.4689
  1. Quad terms
qUER = UER^2
qLife = Life^2
qLit = Literacy^2
qEconomic = Economic^2
## Warning in Ops.factor(Economic, 2): '^' not meaningful for factors
qCrime = Crime^2
qGDP = GDP^2
qWater = Water^2
quadMLRM <- lm(LogPercent~ UER + LogLife + LogLit + Economic + SqrtCrime + SqrtGDP + Water + qUER + qLife + qLit + qCrime + qGDP + qWater)
summary(quadMLRM)
## 
## Call:
## lm(formula = LogPercent ~ UER + LogLife + LogLit + Economic + 
##     SqrtCrime + SqrtGDP + Water + qUER + qLife + qLit + qCrime + 
##     qGDP + qWater)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.95057 -0.61376  0.02908  0.65232  2.64391 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    -5.816e+01  5.291e+01  -1.099 0.274336    
## UER             8.026e+00  2.248e+00   3.571 0.000558 ***
## LogLife         1.197e+01  1.384e+01   0.865 0.389198    
## LogLit         -7.683e+00  5.325e+00  -1.443 0.152311    
## EconomicLow     6.061e-01  8.321e-01   0.728 0.468174    
## EconomicMiddle -2.229e-01  4.105e-01  -0.543 0.588303    
## SqrtCrime      -8.636e-03  3.112e-01  -0.028 0.977921    
## SqrtGDP         2.123e-02  6.012e-03   3.530 0.000639 ***
## Water           8.257e-01  1.573e+00   0.525 0.600740    
## qUER           -5.917e+00  2.896e+00  -2.043 0.043776 *  
## qLife          -1.335e-03  1.348e-03  -0.990 0.324542    
## qLit            6.742e+00  4.021e+00   1.677 0.096839 .  
## qCrime         -1.885e-05  2.458e-04  -0.077 0.939012    
## qGDP           -6.228e-11  1.049e-10  -0.594 0.554145    
## qWater         -8.289e-03  1.557e-02  -0.532 0.595713    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.075 on 96 degrees of freedom
## Multiple R-squared:  0.6041, Adjusted R-squared:  0.5463 
## F-statistic: 10.46 on 14 and 96 DF,  p-value: 6.618e-14
  1. AIC
totalmod <- lm(LogPercent ~ UER + LogLife + Economic + LogLit +
    SqrtCrime + SqrtGDP + Water + qUER + qLit + qCrime + qLife + qGDP + qWater+ (Economic*UER))
library(MASS)
stepAIC(totalmod, direction = "backward")
## Start:  AIC=28.27
## LogPercent ~ UER + LogLife + Economic + LogLit + SqrtCrime + 
##     SqrtGDP + Water + qUER + qLit + qCrime + qLife + qGDP + qWater + 
##     (Economic * UER)
## 
##                Df Sum of Sq    RSS    AIC
## - SqrtCrime     1    0.0481 105.47 26.323
## - qCrime        1    0.1187 105.54 26.397
## - Water         1    0.2940 105.71 26.582
## - qWater        1    0.3036 105.72 26.592
## - LogLife       1    0.6924 106.11 26.999
## - qLife         1    1.0200 106.44 27.341
## - qUER          1    1.3500 106.77 27.685
## - qGDP          1    1.4987 106.92 27.839
## <none>                      105.42 28.272
## - UER:Economic  2    5.5757 110.99 29.993
## - LogLit        1    4.3086 109.73 30.719
## - qLit          1    5.3934 110.81 31.811
## - SqrtGDP       1   17.5645 122.98 43.379
## 
## Step:  AIC=26.32
## LogPercent ~ UER + LogLife + Economic + LogLit + SqrtGDP + Water + 
##     qUER + qLit + qCrime + qLife + qGDP + qWater + UER:Economic
## 
##                Df Sum of Sq    RSS    AIC
## - qCrime        1    0.1364 105.60 24.467
## - Water         1    0.3081 105.77 24.647
## - qWater        1    0.3178 105.78 24.657
## - LogLife       1    0.7226 106.19 25.081
## - qLife         1    1.0594 106.53 25.433
## - qUER          1    1.3471 106.81 25.732
## - qGDP          1    1.6991 107.17 26.097
## <none>                      105.47 26.323
## - UER:Economic  2    5.5285 110.99 27.994
## - LogLit        1    4.3047 109.77 28.764
## - qLit          1    5.3702 110.84 29.836
## - SqrtGDP       1   17.9515 123.42 41.771
## 
## Step:  AIC=24.47
## LogPercent ~ UER + LogLife + Economic + LogLit + SqrtGDP + Water + 
##     qUER + qLit + qLife + qGDP + qWater + UER:Economic
## 
##                Df Sum of Sq    RSS    AIC
## - Water         1    0.4699 106.07 22.959
## - qWater        1    0.4823 106.08 22.972
## - LogLife       1    0.7260 106.33 23.227
## - qLife         1    1.0535 106.66 23.568
## - qUER          1    1.3506 106.95 23.877
## - qGDP          1    1.6814 107.28 24.220
## <none>                      105.60 24.467
## - UER:Economic  2    5.4712 111.07 26.073
## - LogLit        1    4.8636 110.47 27.465
## - qLit          1    6.0785 111.68 28.679
## - SqrtGDP       1   18.2426 123.84 40.154
## 
## Step:  AIC=22.96
## LogPercent ~ UER + LogLife + Economic + LogLit + SqrtGDP + qUER + 
##     qLit + qLife + qGDP + qWater + UER:Economic
## 
##                Df Sum of Sq    RSS    AIC
## - LogLife       1    1.0123 107.08 22.014
## - qLife         1    1.2966 107.37 22.308
## - qUER          1    1.3503 107.42 22.364
## - qWater        1    1.4645 107.54 22.482
## <none>                      106.07 22.959
## - qGDP          1    2.0332 108.11 23.067
## - UER:Economic  2    5.4487 111.52 24.520
## - LogLit        1    4.7375 110.81 25.810
## - qLit          1    6.0985 112.17 27.165
## - SqrtGDP       1   20.2399 126.31 40.344
## 
## Step:  AIC=22.01
## LogPercent ~ UER + Economic + LogLit + SqrtGDP + qUER + qLit + 
##     qLife + qGDP + qWater + UER:Economic
## 
##                Df Sum of Sq    RSS    AIC
## - qLife         1    0.7520 107.84 20.791
## - qUER          1    1.4213 108.51 21.477
## - qWater        1    1.7401 108.83 21.803
## - qGDP          1    1.9149 109.00 21.981
## <none>                      107.08 22.014
## - UER:Economic  2    5.6133 112.70 23.685
## - LogLit        1    4.0033 111.09 24.088
## - qLit          1    5.4928 112.58 25.566
## - SqrtGDP       1   19.3415 126.43 38.444
## 
## Step:  AIC=20.79
## LogPercent ~ UER + Economic + LogLit + SqrtGDP + qUER + qLit + 
##     qGDP + qWater + UER:Economic
## 
##                Df Sum of Sq    RSS    AIC
## - qUER          1    1.2644 109.10 20.084
## - qGDP          1    1.5764 109.41 20.401
## <none>                      107.84 20.791
## - qWater        1    1.9921 109.83 20.822
## - UER:Economic  2    5.2484 113.08 22.066
## - LogLit        1    4.2953 112.13 23.126
## - qLit          1    5.5549 113.39 24.366
## - SqrtGDP       1   18.8634 126.70 36.684
## 
## Step:  AIC=20.08
## LogPercent ~ UER + Economic + LogLit + SqrtGDP + qLit + qGDP + 
##     qWater + UER:Economic
## 
##                Df Sum of Sq    RSS    AIC
## - qGDP          1    1.0556 110.16 19.153
## - qWater        1    1.9590 111.06 20.060
## <none>                      109.10 20.084
## - LogLit        1    3.1984 112.30 21.292
## - qLit          1    4.4191 113.52 22.492
## - UER:Economic  2    8.9558 118.06 24.841
## - SqrtGDP       1   17.5997 126.70 34.685
## 
## Step:  AIC=19.15
## LogPercent ~ UER + Economic + LogLit + SqrtGDP + qLit + qWater + 
##     UER:Economic
## 
##                Df Sum of Sq    RSS    AIC
## - qWater        1    1.7792 111.94 18.932
## <none>                      110.16 19.153
## - LogLit        1    3.2357 113.39 20.367
## - qLit          1    4.8975 115.05 21.982
## - UER:Economic  2    8.6813 118.84 23.573
## - SqrtGDP       1   31.0532 141.21 44.719
## 
## Step:  AIC=18.93
## LogPercent ~ UER + Economic + LogLit + SqrtGDP + qLit + UER:Economic
## 
##                Df Sum of Sq    RSS    AIC
## <none>                      111.94 18.932
## - LogLit        1     3.286 115.22 20.143
## - qLit          1     4.927 116.86 21.714
## - UER:Economic  2     8.783 120.72 23.317
## - SqrtGDP       1    32.550 144.49 45.265
## 
## Call:
## lm(formula = LogPercent ~ UER + Economic + LogLit + SqrtGDP + 
##     qLit + UER:Economic)
## 
## Coefficients:
##        (Intercept)                 UER         EconomicLow  
##          -13.80696             4.87037             1.33476  
##     EconomicMiddle              LogLit             SqrtGDP  
##           -0.25542            -8.26776             0.01764  
##               qLit     UER:EconomicLow  UER:EconomicMiddle  
##            7.80352            -4.07237             1.60495

percent = -13.80696 + 4.87037(.003) - 8.26776* ln(.772) -.25542 +.01764(4000) + 7.80352(.772^2) +1.60495

lit .772 uer .003 gde 4000 econ middle 10. VIF

library(faraway)
finalmod <- lm(LogPercent ~ UER +SqrtGDP + LogLife + qLit + Economic + (UER*Economic))
plot(finalmod)

## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced

## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced

vif(finalmod)
##                UER            SqrtGDP            LogLife 
##          21.521507           4.503508           2.783962 
##               qLit        EconomicLow     EconomicMiddle 
##           2.101997           3.594519           6.741804 
##    UER:EconomicLow UER:EconomicMiddle 
##          16.162785          14.575499
summary(finalmod)
## 
## Call:
## lm(formula = LogPercent ~ UER + SqrtGDP + LogLife + qLit + Economic + 
##     (UER * Economic))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.90498 -0.62212 -0.05181  0.60017  2.64431 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        -0.591266   9.189367  -0.064   0.9488    
## UER                 4.880953   4.157863   1.174   0.2432    
## SqrtGDP             0.018234   0.003387   5.383  4.7e-07 ***
## LogLife            -1.696754   2.204654  -0.770   0.4433    
## qLit                1.831247   0.860085   2.129   0.0356 *  
## EconomicLow         1.455965   0.919504   1.583   0.1164    
## EconomicMiddle     -0.365105   0.524940  -0.696   0.4883    
## UER:EconomicLow    -4.774532   4.390180  -1.088   0.2794    
## UER:EconomicMiddle  1.219070   4.340690   0.281   0.7794    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.06 on 102 degrees of freedom
## Multiple R-squared:  0.5913, Adjusted R-squared:  0.5593 
## F-statistic: 18.45 on 8 and 102 DF,  p-value: < 2.2e-16

lit .772 uer .003 gde 4000 econ middle

  1. Check for normality
hist(resid(finalmod), xlab = "Residuals of the Final Model", main="Histogram of Residuals")

  1. Confident Intervals and Point Prediction
confint(finalmod)
##                           2.5 %      97.5 %
## (Intercept)        -18.81833155 17.63580032
## UER                 -3.36614904 13.12805601
## SqrtGDP              0.01151552  0.02495275
## LogLife             -6.06967554  2.67616736
## qLit                 0.12527227  3.53722205
## EconomicLow         -0.36786748  3.27979750
## EconomicMiddle      -1.40632066  0.67611035
## UER:EconomicLow    -13.48243252  3.93336886
## UER:EconomicMiddle  -7.39066909  9.82880878