who <- read.csv(url("https://raw.githubusercontent.com/JennierJ/CUNY_DATA_605/master/Assignment_12/who.csv"))
head(who)
## Country LifeExp InfantSurvival Under5Survival TBFree
## 1 Afghanistan 42 0.835 0.743 0.99769
## 2 Albania 71 0.985 0.983 0.99974
## 3 Algeria 71 0.967 0.962 0.99944
## 4 Andorra 82 0.997 0.996 0.99983
## 5 Angola 41 0.846 0.740 0.99656
## 6 Antigua and Barbuda 73 0.990 0.989 0.99991
## PropMD PropRN PersExp GovtExp TotExp
## 1 0.000228841 0.000572294 20 92 112
## 2 0.001143127 0.004614439 169 3128 3297
## 3 0.001060478 0.002091362 108 5184 5292
## 4 0.003297297 0.003500000 2589 169725 172314
## 5 0.000070400 0.001146162 36 1620 1656
## 6 0.000142857 0.002773810 503 12543 13046
summary(who)
## Country LifeExp InfantSurvival
## Afghanistan : 1 Min. :40.00 Min. :0.8350
## Albania : 1 1st Qu.:61.25 1st Qu.:0.9433
## Algeria : 1 Median :70.00 Median :0.9785
## Andorra : 1 Mean :67.38 Mean :0.9624
## Angola : 1 3rd Qu.:75.00 3rd Qu.:0.9910
## Antigua and Barbuda: 1 Max. :83.00 Max. :0.9980
## (Other) :184
## Under5Survival TBFree PropMD PropRN
## Min. :0.7310 Min. :0.9870 Min. :0.0000196 Min. :0.0000883
## 1st Qu.:0.9253 1st Qu.:0.9969 1st Qu.:0.0002444 1st Qu.:0.0008455
## Median :0.9745 Median :0.9992 Median :0.0010474 Median :0.0027584
## Mean :0.9459 Mean :0.9980 Mean :0.0017954 Mean :0.0041336
## 3rd Qu.:0.9900 3rd Qu.:0.9998 3rd Qu.:0.0024584 3rd Qu.:0.0057164
## Max. :0.9970 Max. :1.0000 Max. :0.0351290 Max. :0.0708387
##
## PersExp GovtExp TotExp
## Min. : 3.00 Min. : 10.0 Min. : 13
## 1st Qu.: 36.25 1st Qu.: 559.5 1st Qu.: 584
## Median : 199.50 Median : 5385.0 Median : 5541
## Mean : 742.00 Mean : 40953.5 Mean : 41696
## 3rd Qu.: 515.25 3rd Qu.: 25680.2 3rd Qu.: 26331
## Max. :6350.00 Max. :476420.0 Max. :482750
##
# Provide a scatterplot of LifeExp~TotExp
lm <- lm(who$LifeExp ~ who$TotExp)
plot(who$TotExp, who$LifeExp, xlab = "Sum of personal and government expenditures",
ylab = " Average life expectancy for the country in years")
abline(lm)

lm
##
## Call:
## lm(formula = who$LifeExp ~ who$TotExp)
##
## Coefficients:
## (Intercept) who$TotExp
## 6.475e+01 6.297e-05
summary(lm)
##
## Call:
## lm(formula = who$LifeExp ~ who$TotExp)
##
## Residuals:
## Min 1Q Median 3Q Max
## -24.764 -4.778 3.154 7.116 13.292
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.475e+01 7.535e-01 85.933 < 2e-16 ***
## who$TotExp 6.297e-05 7.795e-06 8.079 7.71e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.371 on 188 degrees of freedom
## Multiple R-squared: 0.2577, Adjusted R-squared: 0.2537
## F-statistic: 65.26 on 1 and 188 DF, p-value: 7.714e-14
# It is a very bad liner regression, which can telled from the plot, and R-square is low at 0.2577.
# Raise life expectancy and total expenditures
LifeExp4.6 <- who$LifeExp^4.6
TotExp0.06 <- who$TotExp^0.06
lm_exp <- lm(LifeExp4.6 ~ TotExp0.06)
plot(TotExp0.06, LifeExp4.6, xlab = "Sum of personal and government expenditures^0.06",
ylab = " Average life expectancy for the country in years^4.6")
abline(lm_exp)

lm_exp
##
## Call:
## lm(formula = LifeExp4.6 ~ TotExp0.06)
##
## Coefficients:
## (Intercept) TotExp0.06
## -736527909 620060216
summary(lm_exp)
##
## Call:
## lm(formula = LifeExp4.6 ~ TotExp0.06)
##
## Residuals:
## Min 1Q Median 3Q Max
## -308616089 -53978977 13697187 59139231 211951764
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -736527910 46817945 -15.73 <2e-16 ***
## TotExp0.06 620060216 27518940 22.53 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 90490000 on 188 degrees of freedom
## Multiple R-squared: 0.7298, Adjusted R-squared: 0.7283
## F-statistic: 507.7 on 1 and 188 DF, p-value: < 2.2e-16
# The linear regression model fits better, with higher R-square.
# And the equation is LifeExp4.6 = -736527909 + 620060216 * TotExp0.06
# Question 3
forecast <- data.frame(TotExp = c(1.5, 2.5))
predict(lm, newdata = forecast)
## Warning: 'newdata' had 2 rows but variables found have 190 rows
## 1 2 3 4 5 6 7 8
## 64.76043 64.96099 65.08661 75.60402 64.85765 65.57488 65.99099 64.87579
## 9 10 11 12 13 14 15 16
## 76.76790 76.91556 64.80640 68.34312 67.68111 64.75885 66.33758 65.47873
## 17 18 19 20 21 22 23 24
## 80.02717 65.10437 64.79292 64.78228 64.93794 65.18289 66.01498 65.65454
## 25 26 27 28 29 30 31 32
## 66.71055 65.49781 64.77422 64.75419 64.76402 64.80583 77.11001 65.10021
## 33 34 35 36 37 38 39 40
## 64.76616 64.76949 65.90881 64.84046 65.54749 64.77340 64.81294 66.49954
## 41 42 43 44 45 46 47 48
## 65.74220 64.77535 66.69670 66.09999 67.31935 68.34299 64.75785 84.83696
## 49 50 51 52 53 54 55 56
## 65.00922 65.60309 65.02698 64.99669 64.83952 65.12345 65.17433 64.75892
## 57 58 59 60 61 62 63 64
## 66.51081 64.75778 65.09990 73.36644 79.78241 65.85510 64.78895 64.83971
## 65 66 67 68 69 70 71 72
## 78.15834 64.78612 69.02118 65.21218 64.91282 64.75885 64.75967 64.84531
## 73 74 75 76 77 78 79 80
## 64.78952 64.89525 67.36393 89.99031 64.76842 64.79204 65.26879 64.94273
## 81 82 83 84 85 86 87 88
## 77.19288 70.75324 73.74804 65.04109 74.96261 65.33824 65.10966 64.76943
## 89 90 91 92 93 94 95 96
## 65.04908 68.06731 64.78007 64.75980 65.92884 65.87802 64.78347 64.78001
## 97 98 99 100 101 102 103 104
## 65.59705 66.03671 95.15223 64.76414 64.77044 65.19127 65.28333 64.78247
## 105 106 107 108 109 110 111 112
## 70.61029 65.96051 64.78284 65.06331 65.81216 65.13875 94.02368 64.85249
## 113 114 115 116 117 118 119 120
## 65.63647 64.88158 64.77409 65.00859 66.69078 64.75841 76.76500 74.97740
## 121 122 123 124 125 126 127 128
## 64.89556 64.75929 64.77976 67.03875 89.07813 65.96228 64.76093 67.56059
## 129 130 131 132 133 134 135 136
## 65.87267 64.78007 64.88549 65.04165 64.81124 66.12367 69.61833 75.19799
## 137 138 139 140 141 142 143 144
## 67.44145 64.85173 65.36759 65.55687 64.76842 65.40896 65.09285 65.16394
## 145 146 147 148 149 150 151 152
## 64.89229 82.48912 64.90878 66.52088 64.78750 65.26772 66.07946 64.76421
## 153 154 155 156 157 158 159 160
## 66.70822 66.43606 68.32555 64.78297 65.46853 72.34619 64.77926 64.78429
## 161 162 163 164 165 166 167 168
## 65.22785 64.90463 81.08929 81.37385 64.85677 64.76081 64.89046 65.46393
## 169 170 171 172 173 174 175 176
## 64.82252 64.76742 64.87931 65.01080 65.05425 65.95075 65.07100 65.31998
## 177 178 179 180 181 182 183 184
## 64.75967 65.05261 67.70051 80.06672 64.76861 79.75111 65.77525 64.78297
## 185 186 187 188 189 190
## 64.82409 65.43188 64.77271 64.78404 64.79311 64.77510
# Question 4
lm4 <- lm(LifeExp ~ PropMD + TotExp + TotExp:PropMD, data = who)
lm4
##
## Call:
## lm(formula = LifeExp ~ PropMD + TotExp + TotExp:PropMD, data = who)
##
## Coefficients:
## (Intercept) PropMD TotExp PropMD:TotExp
## 6.277e+01 1.497e+03 7.233e-05 -6.026e-03
summary(lm4)
##
## Call:
## lm(formula = LifeExp ~ PropMD + TotExp + TotExp:PropMD, data = who)
##
## Residuals:
## Min 1Q Median 3Q Max
## -27.320 -4.132 2.098 6.540 13.074
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.277e+01 7.956e-01 78.899 < 2e-16 ***
## PropMD 1.497e+03 2.788e+02 5.371 2.32e-07 ***
## TotExp 7.233e-05 8.982e-06 8.053 9.39e-14 ***
## PropMD:TotExp -6.026e-03 1.472e-03 -4.093 6.35e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.765 on 186 degrees of freedom
## Multiple R-squared: 0.3574, Adjusted R-squared: 0.3471
## F-statistic: 34.49 on 3 and 186 DF, p-value: < 2.2e-16
# Question 4
lm4 <- lm(LifeExp ~ PropMD + TotExp + TotExp, data = who)
lm4
##
## Call:
## lm(formula = LifeExp ~ PropMD + TotExp + TotExp, data = who)
##
## Coefficients:
## (Intercept) PropMD TotExp
## 6.397e+01 6.508e+02 5.378e-05
summary(lm4)
##
## Call:
## lm(formula = LifeExp ~ PropMD + TotExp + TotExp, data = who)
##
## Residuals:
## Min 1Q Median 3Q Max
## -23.996 -4.880 3.042 6.958 13.415
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.397e+01 7.706e-01 83.012 < 2e-16 ***
## PropMD 6.508e+02 1.946e+02 3.344 0.000998 ***
## TotExp 5.378e-05 8.074e-06 6.661 2.95e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.127 on 187 degrees of freedom
## Multiple R-squared: 0.2996, Adjusted R-squared: 0.2921
## F-statistic: 39.99 on 2 and 187 DF, p-value: 3.479e-15
# Question 5
newforecast <- data.frame(PropMD = 0.03, TotExp = 14)
predict(lm4, newforecast)
## 1
## 83.49418