library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.8 ✔ dplyr 1.0.9
## ✔ tidyr 1.2.0 ✔ stringr 1.4.0
## ✔ readr 2.1.2 ✔ forcats 0.5.1
## Warning: package 'ggplot2' was built under R version 4.2.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
WHO_Data <- read_csv('https://raw.githubusercontent.com/johnnyboy1287/WHO_Data/main/who.csv')
## Warning in gzfile(file, mode): cannot open compressed file 'C:/Users/NCC-1701D/
## AppData/Local/Temp/RtmpyKqxbO\file2d68583452c1', probable reason 'No such file
## or directory'
## Rows: 190 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Country
## dbl (9): LifeExp, InfantSurvival, Under5Survival, TBFree, PropMD, PropRN, Pe...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
glimpse(WHO_Data)
## Rows: 190
## Columns: 10
## $ Country <chr> "Afghanistan", "Albania", "Algeria", "Andorra", "Angola…
## $ LifeExp <dbl> 42, 71, 71, 82, 41, 73, 75, 69, 82, 80, 64, 74, 75, 63,…
## $ InfantSurvival <dbl> 0.835, 0.985, 0.967, 0.997, 0.846, 0.990, 0.986, 0.979,…
## $ Under5Survival <dbl> 0.743, 0.983, 0.962, 0.996, 0.740, 0.989, 0.983, 0.976,…
## $ TBFree <dbl> 0.99769, 0.99974, 0.99944, 0.99983, 0.99656, 0.99991, 0…
## $ PropMD <dbl> 0.000228841, 0.001143127, 0.001060478, 0.003297297, 0.0…
## $ PropRN <dbl> 0.000572294, 0.004614439, 0.002091362, 0.003500000, 0.0…
## $ PersExp <dbl> 20, 169, 108, 2589, 36, 503, 484, 88, 3181, 3788, 62, 1…
## $ GovtExp <dbl> 92, 3128, 5184, 169725, 1620, 12543, 19170, 1856, 18761…
## $ TotExp <dbl> 112, 3297, 5292, 172314, 1656, 13046, 19654, 1944, 1907…
plot(LifeExp ~ TotExp, data = WHO_Data)
lm1 = lm(LifeExp ~ TotExp, data = WHO_Data)
summary(lm1)
##
## Call:
## lm(formula = LifeExp ~ TotExp, data = WHO_Data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -24.764 -4.778 3.154 7.116 13.292
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.475e+01 7.535e-01 85.933 < 2e-16 ***
## TotExp 6.297e-05 7.795e-06 8.079 7.71e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.371 on 188 degrees of freedom
## Multiple R-squared: 0.2577, Adjusted R-squared: 0.2537
## F-statistic: 65.26 on 1 and 188 DF, p-value: 7.714e-14
SInce our p level is less that .05 we can consider this to be statistically significant. Although, The F-statistic is 65.26 and the R squared is .25 meaning that 25% of the data is explained by the model making not the best model of our data.
WHO_Data$LifeExp_new = (WHO_Data$LifeExp)^4.6
WHO_Data$TotExp_new = (WHO_Data$TotExp)^.06
glimpse(WHO_Data)
## Rows: 190
## Columns: 12
## $ Country <chr> "Afghanistan", "Albania", "Algeria", "Andorra", "Angola…
## $ LifeExp <dbl> 42, 71, 71, 82, 41, 73, 75, 69, 82, 80, 64, 74, 75, 63,…
## $ InfantSurvival <dbl> 0.835, 0.985, 0.967, 0.997, 0.846, 0.990, 0.986, 0.979,…
## $ Under5Survival <dbl> 0.743, 0.983, 0.962, 0.996, 0.740, 0.989, 0.983, 0.976,…
## $ TBFree <dbl> 0.99769, 0.99974, 0.99944, 0.99983, 0.99656, 0.99991, 0…
## $ PropMD <dbl> 0.000228841, 0.001143127, 0.001060478, 0.003297297, 0.0…
## $ PropRN <dbl> 0.000572294, 0.004614439, 0.002091362, 0.003500000, 0.0…
## $ PersExp <dbl> 20, 169, 108, 2589, 36, 503, 484, 88, 3181, 3788, 62, 1…
## $ GovtExp <dbl> 92, 3128, 5184, 169725, 1620, 12543, 19170, 1856, 18761…
## $ TotExp <dbl> 112, 3297, 5292, 172314, 1656, 13046, 19654, 1944, 1907…
## $ LifeExp_new <dbl> 29305338, 327935478, 327935478, 636126841, 26230450, 37…
## $ TotExp_new <dbl> 1.327251, 1.625875, 1.672697, 2.061481, 1.560068, 1.765…
plot(LifeExp_new ~ TotExp_new, data = WHO_Data)
lm2 = lm(LifeExp_new ~ TotExp_new, data = WHO_Data)
summary(lm2)
##
## Call:
## lm(formula = LifeExp_new ~ TotExp_new, data = WHO_Data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -308616089 -53978977 13697187 59139231 211951764
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -736527910 46817945 -15.73 <2e-16 ***
## TotExp_new 620060216 27518940 22.53 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 90490000 on 188 degrees of freedom
## Multiple R-squared: 0.7298, Adjusted R-squared: 0.7283
## F-statistic: 507.7 on 1 and 188 DF, p-value: < 2.2e-16
The R squared value is lower here than our previous model and our F-Statistic is higher. Our model here is better than our original one.
pred1 = predict(lm2, data.frame(TotExp_new = 1.5))
log(pred1, 4.6)
## 1
## 12.50354
pred2 = predict(lm2, data.frame(TotExp_new = 2.5))
log(pred2, 4.6)
## 1
## 13.44446
lm3 = lm(LifeExp ~ PropMD+TotExp+PropMD*TotExp, data = WHO_Data)
summary(lm3)
##
## Call:
## lm(formula = LifeExp ~ PropMD + TotExp + PropMD * TotExp, data = WHO_Data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -27.320 -4.132 2.098 6.540 13.074
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.277e+01 7.956e-01 78.899 < 2e-16 ***
## PropMD 1.497e+03 2.788e+02 5.371 2.32e-07 ***
## TotExp 7.233e-05 8.982e-06 8.053 9.39e-14 ***
## PropMD:TotExp -6.026e-03 1.472e-03 -4.093 6.35e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.765 on 186 degrees of freedom
## Multiple R-squared: 0.3574, Adjusted R-squared: 0.3471
## F-statistic: 34.49 on 3 and 186 DF, p-value: < 2.2e-16
predict(lm3, data.frame(PropMD=0.03, TotExp=14))
## 1
## 107.696
I would say this model does not seem to be a realistic as a life expectancy of 108 years does not seem plausible in this current time if we increase the amount of doctors by 3%.