country <- read.csv('https://raw.githubusercontent.com/Kingtilon1/DATA607/main/who.csv')
glimpse(country)
## Rows: 190
## Columns: 10
## $ Country <chr> "Afghanistan", "Albania", "Algeria", "Andorra", "Angola…
## $ LifeExp <int> 42, 71, 71, 82, 41, 73, 75, 69, 82, 80, 64, 74, 75, 63,…
## $ InfantSurvival <dbl> 0.835, 0.985, 0.967, 0.997, 0.846, 0.990, 0.986, 0.979,…
## $ Under5Survival <dbl> 0.743, 0.983, 0.962, 0.996, 0.740, 0.989, 0.983, 0.976,…
## $ TBFree <dbl> 0.99769, 0.99974, 0.99944, 0.99983, 0.99656, 0.99991, 0…
## $ PropMD <dbl> 0.000228841, 0.001143127, 0.001060478, 0.003297297, 0.0…
## $ PropRN <dbl> 0.000572294, 0.004614439, 0.002091362, 0.003500000, 0.0…
## $ PersExp <int> 20, 169, 108, 2589, 36, 503, 484, 88, 3181, 3788, 62, 1…
## $ GovtExp <int> 92, 3128, 5184, 169725, 1620, 12543, 19170, 1856, 18761…
## $ TotExp <int> 112, 3297, 5292, 172314, 1656, 13046, 19654, 1944, 1907…
scatterplot <- ggplot(country, aes(x = TotExp, y = LifeExp)) +
geom_point() +
xlab("Total Expenditures (US dollars)") +
ylab("Life Expectancy (years)") +
ggtitle("Scatterplot of Life Expectancy vs. Total Expenditures")
print(scatterplot)
model <- lm(LifeExp ~ TotExp, data = country)
summary(model)
##
## Call:
## lm(formula = LifeExp ~ TotExp, data = country)
##
## Residuals:
## Min 1Q Median 3Q Max
## -24.764 -4.778 3.154 7.116 13.292
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.475e+01 7.535e-01 85.933 < 2e-16 ***
## TotExp 6.297e-05 7.795e-06 8.079 7.71e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.371 on 188 degrees of freedom
## Multiple R-squared: 0.2577, Adjusted R-squared: 0.2537
## F-statistic: 65.26 on 1 and 188 DF, p-value: 7.714e-14
country_transformed <- country %>%
mutate(LifeExp_transformed = LifeExp^4.6,
TotExp_0.06 = TotExp^0.06)
scatterplot_transformed <- ggplot(country_transformed, aes(x = TotExp_0.06, y = LifeExp_transformed)) +
geom_point() +
xlab("Total Expenditures Transformed") +
ylab("Life Expectancy Transformed") +
ggtitle("Scatterplot of Transformed Life Expectancy vs. Total Expenditures")
print(scatterplot_transformed)
model_transformed <- lm(LifeExp_transformed ~ TotExp_0.06, data = country_transformed)
summary(model_transformed)
##
## Call:
## lm(formula = LifeExp_transformed ~ TotExp_0.06, data = country_transformed)
##
## Residuals:
## Min 1Q Median 3Q Max
## -308616089 -53978977 13697187 59139231 211951764
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -736527910 46817945 -15.73 <2e-16 ***
## TotExp_0.06 620060216 27518940 22.53 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 90490000 on 188 degrees of freedom
## Multiple R-squared: 0.7298, Adjusted R-squared: 0.7283
## F-statistic: 507.7 on 1 and 188 DF, p-value: < 2.2e-16
The F-statistic, which is 507.7 with a very low p-value (< 2.2e-16), shows the statistical significance of the model. This means that the transformed model provides a better fit to the data compared to the original one. Also, with an R-squared value of 0.7298, 73% of the variability in transformed life expectancy can be explained by transformed total expenditures, indicating a significant improvement in explanatory power. Additionally, the standard error of the residuals, at 90490000, indicates a higher level of variability compared to the original model, implying greater dispersion in prediction accuracy.
totexp_1 <- 1.5
totexp_2 <- 2.5
lifeexp_transformed_1 <- -736527910 + 620060216 * totexp_1
lifeexp_transformed_2 <- -736527910 + 620060216 * totexp_2
lifeexp_1 <- lifeexp_transformed_1^(1/4.6)
lifeexp_2 <- lifeexp_transformed_2^(1/4.6)
lifeexp_1
## [1] 63.31153
lifeexp_2
## [1] 86.50645
model_multiple <- lm(LifeExp ~ PropMD + TotExp + PropMD * TotExp, data = country)
summary(model_multiple)
##
## Call:
## lm(formula = LifeExp ~ PropMD + TotExp + PropMD * TotExp, data = country)
##
## Residuals:
## Min 1Q Median 3Q Max
## -27.320 -4.132 2.098 6.540 13.074
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.277e+01 7.956e-01 78.899 < 2e-16 ***
## PropMD 1.497e+03 2.788e+02 5.371 2.32e-07 ***
## TotExp 7.233e-05 8.982e-06 8.053 9.39e-14 ***
## PropMD:TotExp -6.026e-03 1.472e-03 -4.093 6.35e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.765 on 186 degrees of freedom
## Multiple R-squared: 0.3574, Adjusted R-squared: 0.3471
## F-statistic: 34.49 on 3 and 186 DF, p-value: < 2.2e-16
In comparison to the previous model (LifeExp_transformed ~ TotExp_0.06), the current model reveals a strong statistical significance with a high F-statistic of 507.7 and a very low p-value, suggesting a robust relationship between the transformed variables. Additionally, the model exhibits a high R-squared value of 0.7298, indicating that approximately 73% of the variability in transformed life expectancy can be explained by TotExp_0.06. This underscores the effectiveness of the model in capturing the relationship between healthcare expenditures and life expectancy, further highlighting its importance in healthcare research and policy formulation.
# Coefficients from the multiple regression model
b0 <- 6.277e+01
b1 <- 1.497e+03
b2 <- 7.233e-05
b3 <- -6.026e-03
PropMD <- 0.03
TotExp <- 14
LifeExp <- b0 + b1 * PropMD + b2 * TotExp + b3 * PropMD * TotExp
LifeExp
## [1] 107.6785
This value is a lot higher than the life expectancy values we see so I don’t think this value is realistic seeing that the max life expectancy is 83 years old