who <- read.csv(url("https://raw.githubusercontent.com/JennierJ/CUNY_DATA_605/master/Assignment_12/who.csv"))
head(who)
##               Country LifeExp InfantSurvival Under5Survival  TBFree
## 1         Afghanistan      42          0.835          0.743 0.99769
## 2             Albania      71          0.985          0.983 0.99974
## 3             Algeria      71          0.967          0.962 0.99944
## 4             Andorra      82          0.997          0.996 0.99983
## 5              Angola      41          0.846          0.740 0.99656
## 6 Antigua and Barbuda      73          0.990          0.989 0.99991
##        PropMD      PropRN PersExp GovtExp TotExp
## 1 0.000228841 0.000572294      20      92    112
## 2 0.001143127 0.004614439     169    3128   3297
## 3 0.001060478 0.002091362     108    5184   5292
## 4 0.003297297 0.003500000    2589  169725 172314
## 5 0.000070400 0.001146162      36    1620   1656
## 6 0.000142857 0.002773810     503   12543  13046
summary(who)
##                 Country       LifeExp      InfantSurvival  
##  Afghanistan        :  1   Min.   :40.00   Min.   :0.8350  
##  Albania            :  1   1st Qu.:61.25   1st Qu.:0.9433  
##  Algeria            :  1   Median :70.00   Median :0.9785  
##  Andorra            :  1   Mean   :67.38   Mean   :0.9624  
##  Angola             :  1   3rd Qu.:75.00   3rd Qu.:0.9910  
##  Antigua and Barbuda:  1   Max.   :83.00   Max.   :0.9980  
##  (Other)            :184                                   
##  Under5Survival       TBFree           PropMD              PropRN         
##  Min.   :0.7310   Min.   :0.9870   Min.   :0.0000196   Min.   :0.0000883  
##  1st Qu.:0.9253   1st Qu.:0.9969   1st Qu.:0.0002444   1st Qu.:0.0008455  
##  Median :0.9745   Median :0.9992   Median :0.0010474   Median :0.0027584  
##  Mean   :0.9459   Mean   :0.9980   Mean   :0.0017954   Mean   :0.0041336  
##  3rd Qu.:0.9900   3rd Qu.:0.9998   3rd Qu.:0.0024584   3rd Qu.:0.0057164  
##  Max.   :0.9970   Max.   :1.0000   Max.   :0.0351290   Max.   :0.0708387  
##                                                                           
##     PersExp           GovtExp             TotExp      
##  Min.   :   3.00   Min.   :    10.0   Min.   :    13  
##  1st Qu.:  36.25   1st Qu.:   559.5   1st Qu.:   584  
##  Median : 199.50   Median :  5385.0   Median :  5541  
##  Mean   : 742.00   Mean   : 40953.5   Mean   : 41696  
##  3rd Qu.: 515.25   3rd Qu.: 25680.2   3rd Qu.: 26331  
##  Max.   :6350.00   Max.   :476420.0   Max.   :482750  
## 
# Provide a scatterplot of LifeExp~TotExp
lm <- lm(who$LifeExp ~ who$TotExp)

plot(who$TotExp, who$LifeExp, xlab = "Sum of personal and government expenditures", 
     ylab = " Average life expectancy for the country in years")
abline(lm)

lm
## 
## Call:
## lm(formula = who$LifeExp ~ who$TotExp)
## 
## Coefficients:
## (Intercept)   who$TotExp  
##   6.475e+01    6.297e-05
summary(lm)
## 
## Call:
## lm(formula = who$LifeExp ~ who$TotExp)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -24.764  -4.778   3.154   7.116  13.292 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 6.475e+01  7.535e-01  85.933  < 2e-16 ***
## who$TotExp  6.297e-05  7.795e-06   8.079 7.71e-14 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.371 on 188 degrees of freedom
## Multiple R-squared:  0.2577, Adjusted R-squared:  0.2537 
## F-statistic: 65.26 on 1 and 188 DF,  p-value: 7.714e-14
# It is a very bad liner regression, which can telled from the plot, and R-square is low at 0.2577.
# Raise life expectancy and total expenditures
LifeExp4.6 <- who$LifeExp^4.6
TotExp0.06 <- who$TotExp^0.06
lm_exp <- lm(LifeExp4.6 ~ TotExp0.06)
plot(TotExp0.06, LifeExp4.6, xlab = "Sum of personal and government expenditures^0.06", 
     ylab = " Average life expectancy for the country in years^4.6")
abline(lm_exp)

lm_exp
## 
## Call:
## lm(formula = LifeExp4.6 ~ TotExp0.06)
## 
## Coefficients:
## (Intercept)   TotExp0.06  
##  -736527909    620060216
summary(lm_exp)
## 
## Call:
## lm(formula = LifeExp4.6 ~ TotExp0.06)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -308616089  -53978977   13697187   59139231  211951764 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -736527910   46817945  -15.73   <2e-16 ***
## TotExp0.06   620060216   27518940   22.53   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 90490000 on 188 degrees of freedom
## Multiple R-squared:  0.7298, Adjusted R-squared:  0.7283 
## F-statistic: 507.7 on 1 and 188 DF,  p-value: < 2.2e-16
# The linear regression model fits better, with higher R-square. 
# And the equation is LifeExp4.6 = -736527909 + 620060216 * TotExp0.06
# Question 3
forecast <- data.frame(TotExp = c(1.5, 2.5))
predict(lm, newdata = forecast)
## Warning: 'newdata' had 2 rows but variables found have 190 rows
##        1        2        3        4        5        6        7        8 
## 64.76043 64.96099 65.08661 75.60402 64.85765 65.57488 65.99099 64.87579 
##        9       10       11       12       13       14       15       16 
## 76.76790 76.91556 64.80640 68.34312 67.68111 64.75885 66.33758 65.47873 
##       17       18       19       20       21       22       23       24 
## 80.02717 65.10437 64.79292 64.78228 64.93794 65.18289 66.01498 65.65454 
##       25       26       27       28       29       30       31       32 
## 66.71055 65.49781 64.77422 64.75419 64.76402 64.80583 77.11001 65.10021 
##       33       34       35       36       37       38       39       40 
## 64.76616 64.76949 65.90881 64.84046 65.54749 64.77340 64.81294 66.49954 
##       41       42       43       44       45       46       47       48 
## 65.74220 64.77535 66.69670 66.09999 67.31935 68.34299 64.75785 84.83696 
##       49       50       51       52       53       54       55       56 
## 65.00922 65.60309 65.02698 64.99669 64.83952 65.12345 65.17433 64.75892 
##       57       58       59       60       61       62       63       64 
## 66.51081 64.75778 65.09990 73.36644 79.78241 65.85510 64.78895 64.83971 
##       65       66       67       68       69       70       71       72 
## 78.15834 64.78612 69.02118 65.21218 64.91282 64.75885 64.75967 64.84531 
##       73       74       75       76       77       78       79       80 
## 64.78952 64.89525 67.36393 89.99031 64.76842 64.79204 65.26879 64.94273 
##       81       82       83       84       85       86       87       88 
## 77.19288 70.75324 73.74804 65.04109 74.96261 65.33824 65.10966 64.76943 
##       89       90       91       92       93       94       95       96 
## 65.04908 68.06731 64.78007 64.75980 65.92884 65.87802 64.78347 64.78001 
##       97       98       99      100      101      102      103      104 
## 65.59705 66.03671 95.15223 64.76414 64.77044 65.19127 65.28333 64.78247 
##      105      106      107      108      109      110      111      112 
## 70.61029 65.96051 64.78284 65.06331 65.81216 65.13875 94.02368 64.85249 
##      113      114      115      116      117      118      119      120 
## 65.63647 64.88158 64.77409 65.00859 66.69078 64.75841 76.76500 74.97740 
##      121      122      123      124      125      126      127      128 
## 64.89556 64.75929 64.77976 67.03875 89.07813 65.96228 64.76093 67.56059 
##      129      130      131      132      133      134      135      136 
## 65.87267 64.78007 64.88549 65.04165 64.81124 66.12367 69.61833 75.19799 
##      137      138      139      140      141      142      143      144 
## 67.44145 64.85173 65.36759 65.55687 64.76842 65.40896 65.09285 65.16394 
##      145      146      147      148      149      150      151      152 
## 64.89229 82.48912 64.90878 66.52088 64.78750 65.26772 66.07946 64.76421 
##      153      154      155      156      157      158      159      160 
## 66.70822 66.43606 68.32555 64.78297 65.46853 72.34619 64.77926 64.78429 
##      161      162      163      164      165      166      167      168 
## 65.22785 64.90463 81.08929 81.37385 64.85677 64.76081 64.89046 65.46393 
##      169      170      171      172      173      174      175      176 
## 64.82252 64.76742 64.87931 65.01080 65.05425 65.95075 65.07100 65.31998 
##      177      178      179      180      181      182      183      184 
## 64.75967 65.05261 67.70051 80.06672 64.76861 79.75111 65.77525 64.78297 
##      185      186      187      188      189      190 
## 64.82409 65.43188 64.77271 64.78404 64.79311 64.77510
# Question 4
lm4 <- lm(LifeExp ~ PropMD + TotExp + TotExp:PropMD, data = who)
lm4
## 
## Call:
## lm(formula = LifeExp ~ PropMD + TotExp + TotExp:PropMD, data = who)
## 
## Coefficients:
##   (Intercept)         PropMD         TotExp  PropMD:TotExp  
##     6.277e+01      1.497e+03      7.233e-05     -6.026e-03
summary(lm4)
## 
## Call:
## lm(formula = LifeExp ~ PropMD + TotExp + TotExp:PropMD, data = who)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -27.320  -4.132   2.098   6.540  13.074 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    6.277e+01  7.956e-01  78.899  < 2e-16 ***
## PropMD         1.497e+03  2.788e+02   5.371 2.32e-07 ***
## TotExp         7.233e-05  8.982e-06   8.053 9.39e-14 ***
## PropMD:TotExp -6.026e-03  1.472e-03  -4.093 6.35e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.765 on 186 degrees of freedom
## Multiple R-squared:  0.3574, Adjusted R-squared:  0.3471 
## F-statistic: 34.49 on 3 and 186 DF,  p-value: < 2.2e-16
# Question 4
lm4 <- lm(LifeExp ~ PropMD + TotExp + TotExp, data = who)
lm4
## 
## Call:
## lm(formula = LifeExp ~ PropMD + TotExp + TotExp, data = who)
## 
## Coefficients:
## (Intercept)       PropMD       TotExp  
##   6.397e+01    6.508e+02    5.378e-05
summary(lm4)
## 
## Call:
## lm(formula = LifeExp ~ PropMD + TotExp + TotExp, data = who)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -23.996  -4.880   3.042   6.958  13.415 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 6.397e+01  7.706e-01  83.012  < 2e-16 ***
## PropMD      6.508e+02  1.946e+02   3.344 0.000998 ***
## TotExp      5.378e-05  8.074e-06   6.661 2.95e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.127 on 187 degrees of freedom
## Multiple R-squared:  0.2996, Adjusted R-squared:  0.2921 
## F-statistic: 39.99 on 2 and 187 DF,  p-value: 3.479e-15
# Question 5
newforecast <- data.frame(PropMD = 0.03, TotExp = 14)
predict(lm4, newforecast)
##        1 
## 83.49418