library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6     ✔ purrr   0.3.4
## ✔ tibble  3.1.8     ✔ dplyr   1.0.9
## ✔ tidyr   1.2.0     ✔ stringr 1.4.0
## ✔ readr   2.1.2     ✔ forcats 0.5.1
## Warning: package 'ggplot2' was built under R version 4.2.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
WHO_Data <- read_csv('https://raw.githubusercontent.com/johnnyboy1287/WHO_Data/main/who.csv')
## Warning in gzfile(file, mode): cannot open compressed file 'C:/Users/NCC-1701D/
## AppData/Local/Temp/RtmpyKqxbO\file2d68583452c1', probable reason 'No such file
## or directory'
## Rows: 190 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Country
## dbl (9): LifeExp, InfantSurvival, Under5Survival, TBFree, PropMD, PropRN, Pe...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
glimpse(WHO_Data)
## Rows: 190
## Columns: 10
## $ Country        <chr> "Afghanistan", "Albania", "Algeria", "Andorra", "Angola…
## $ LifeExp        <dbl> 42, 71, 71, 82, 41, 73, 75, 69, 82, 80, 64, 74, 75, 63,…
## $ InfantSurvival <dbl> 0.835, 0.985, 0.967, 0.997, 0.846, 0.990, 0.986, 0.979,…
## $ Under5Survival <dbl> 0.743, 0.983, 0.962, 0.996, 0.740, 0.989, 0.983, 0.976,…
## $ TBFree         <dbl> 0.99769, 0.99974, 0.99944, 0.99983, 0.99656, 0.99991, 0…
## $ PropMD         <dbl> 0.000228841, 0.001143127, 0.001060478, 0.003297297, 0.0…
## $ PropRN         <dbl> 0.000572294, 0.004614439, 0.002091362, 0.003500000, 0.0…
## $ PersExp        <dbl> 20, 169, 108, 2589, 36, 503, 484, 88, 3181, 3788, 62, 1…
## $ GovtExp        <dbl> 92, 3128, 5184, 169725, 1620, 12543, 19170, 1856, 18761…
## $ TotExp         <dbl> 112, 3297, 5292, 172314, 1656, 13046, 19654, 1944, 1907…

Exercise 1

plot(LifeExp ~ TotExp, data = WHO_Data)

lm1 = lm(LifeExp ~ TotExp, data = WHO_Data)
summary(lm1)
## 
## Call:
## lm(formula = LifeExp ~ TotExp, data = WHO_Data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -24.764  -4.778   3.154   7.116  13.292 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 6.475e+01  7.535e-01  85.933  < 2e-16 ***
## TotExp      6.297e-05  7.795e-06   8.079 7.71e-14 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.371 on 188 degrees of freedom
## Multiple R-squared:  0.2577, Adjusted R-squared:  0.2537 
## F-statistic: 65.26 on 1 and 188 DF,  p-value: 7.714e-14

SInce our p level is less that .05 we can consider this to be statistically significant. Although, The F-statistic is 65.26 and the R squared is .25 meaning that 25% of the data is explained by the model making not the best model of our data.

Exercise 2

WHO_Data$LifeExp_new = (WHO_Data$LifeExp)^4.6
WHO_Data$TotExp_new = (WHO_Data$TotExp)^.06

glimpse(WHO_Data)
## Rows: 190
## Columns: 12
## $ Country        <chr> "Afghanistan", "Albania", "Algeria", "Andorra", "Angola…
## $ LifeExp        <dbl> 42, 71, 71, 82, 41, 73, 75, 69, 82, 80, 64, 74, 75, 63,…
## $ InfantSurvival <dbl> 0.835, 0.985, 0.967, 0.997, 0.846, 0.990, 0.986, 0.979,…
## $ Under5Survival <dbl> 0.743, 0.983, 0.962, 0.996, 0.740, 0.989, 0.983, 0.976,…
## $ TBFree         <dbl> 0.99769, 0.99974, 0.99944, 0.99983, 0.99656, 0.99991, 0…
## $ PropMD         <dbl> 0.000228841, 0.001143127, 0.001060478, 0.003297297, 0.0…
## $ PropRN         <dbl> 0.000572294, 0.004614439, 0.002091362, 0.003500000, 0.0…
## $ PersExp        <dbl> 20, 169, 108, 2589, 36, 503, 484, 88, 3181, 3788, 62, 1…
## $ GovtExp        <dbl> 92, 3128, 5184, 169725, 1620, 12543, 19170, 1856, 18761…
## $ TotExp         <dbl> 112, 3297, 5292, 172314, 1656, 13046, 19654, 1944, 1907…
## $ LifeExp_new    <dbl> 29305338, 327935478, 327935478, 636126841, 26230450, 37…
## $ TotExp_new     <dbl> 1.327251, 1.625875, 1.672697, 2.061481, 1.560068, 1.765…
plot(LifeExp_new ~ TotExp_new, data = WHO_Data)

lm2 = lm(LifeExp_new ~ TotExp_new, data = WHO_Data)
summary(lm2)
## 
## Call:
## lm(formula = LifeExp_new ~ TotExp_new, data = WHO_Data)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -308616089  -53978977   13697187   59139231  211951764 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -736527910   46817945  -15.73   <2e-16 ***
## TotExp_new   620060216   27518940   22.53   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 90490000 on 188 degrees of freedom
## Multiple R-squared:  0.7298, Adjusted R-squared:  0.7283 
## F-statistic: 507.7 on 1 and 188 DF,  p-value: < 2.2e-16

The R squared value is lower here than our previous model and our F-Statistic is higher. Our model here is better than our original one.

Exercise 3

pred1 = predict(lm2, data.frame(TotExp_new = 1.5))
log(pred1, 4.6)
##        1 
## 12.50354
pred2 = predict(lm2, data.frame(TotExp_new = 2.5))
log(pred2, 4.6)
##        1 
## 13.44446

Exercise 4

lm3 = lm(LifeExp ~ PropMD+TotExp+PropMD*TotExp, data = WHO_Data)
summary(lm3)
## 
## Call:
## lm(formula = LifeExp ~ PropMD + TotExp + PropMD * TotExp, data = WHO_Data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -27.320  -4.132   2.098   6.540  13.074 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    6.277e+01  7.956e-01  78.899  < 2e-16 ***
## PropMD         1.497e+03  2.788e+02   5.371 2.32e-07 ***
## TotExp         7.233e-05  8.982e-06   8.053 9.39e-14 ***
## PropMD:TotExp -6.026e-03  1.472e-03  -4.093 6.35e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.765 on 186 degrees of freedom
## Multiple R-squared:  0.3574, Adjusted R-squared:  0.3471 
## F-statistic: 34.49 on 3 and 186 DF,  p-value: < 2.2e-16

Exercise 5

predict(lm3, data.frame(PropMD=0.03, TotExp=14))
##       1 
## 107.696

I would say this model does not seem to be a realistic as a life expectancy of 108 years does not seem plausible in this current time if we increase the amount of doctors by 3%.