library(readr)
data <- read_csv("who.csv")
## Rows: 190 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Country
## dbl (9): LifeExp, InfantSurvival, Under5Survival, TBFree, PropMD, PropRN, Pe...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#scatter
plot(data$TotExp, data$LifeExp,
xlab = "Total Expenditure", ylab = "Life Expectancy",
main = "Life Expectancy vs Total Expenditure")
model <- lm(LifeExp ~ TotExp, data = data)
summary(model)
##
## Call:
## lm(formula = LifeExp ~ TotExp, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -24.764 -4.778 3.154 7.116 13.292
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.475e+01 7.535e-01 85.933 < 2e-16 ***
## TotExp 6.297e-05 7.795e-06 8.079 7.71e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.371 on 188 degrees of freedom
## Multiple R-squared: 0.2577, Adjusted R-squared: 0.2537
## F-statistic: 65.26 on 1 and 188 DF, p-value: 7.714e-14
F Stat: 65.26
F Stat P Val: 7.714e-14
R^2: .2577
model2 <- lm(I(LifeExp^4.6) ~ I(TotExp^.06), data = data)
summary(model2)
##
## Call:
## lm(formula = I(LifeExp^4.6) ~ I(TotExp^0.06), data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -308616089 -53978977 13697187 59139231 211951764
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -736527910 46817945 -15.73 <2e-16 ***
## I(TotExp^0.06) 620060216 27518940 22.53 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 90490000 on 188 degrees of freedom
## Multiple R-squared: 0.7298, Adjusted R-squared: 0.7283
## F-statistic: 507.7 on 1 and 188 DF, p-value: < 2.2e-16
F Stat: 507.7
F Stat P Val: 2.2e-16
R^2: .72
data2 <- data
data2$TotExp <- data2$TotExp^4.6
data2$LifeExp <- data2$LifeExp^.06
model2.1 <- lm(LifeExp ~ TotExp, data = data2)
summary(model2.1)
##
## Call:
## lm(formula = LifeExp ~ TotExp, data = data2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.038175 -0.006180 0.004431 0.008741 0.017524
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.286e+00 9.848e-04 1305.760 <2e-16 ***
## TotExp 1.761e-28 6.771e-29 2.601 0.01 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.01339 on 188 degrees of freedom
## Multiple R-squared: 0.03475, Adjusted R-squared: 0.02961
## F-statistic: 6.767 on 1 and 188 DF, p-value: 0.01002
F Stat: 6.767
F Stat P Val: .01002
R^2: ~.035
plot(data2$TotExp, data2$LifeExp,
xlab = "Total Exp", ylab = "Life Exp",
main = "Life Exp as a fn of Total Exp")
| The plot does not really improve over the first, unless the outliers
were to be removed and the plot could hone in on the early interactions
in 0.0e+00. The plots are equivalent regardless of which transformation
method used.
plot((data$TotExp)^4.6, (data2$LifeExp)^.06,
xlab = "Total Exp", ylab = "Life Exp",
main = "Life Exp as a fn of Total Exp")
summary(model2)
##
## Call:
## lm(formula = I(LifeExp^4.6) ~ I(TotExp^0.06), data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -308616089 -53978977 13697187 59139231 211951764
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -736527910 46817945 -15.73 <2e-16 ***
## I(TotExp^0.06) 620060216 27518940 22.53 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 90490000 on 188 degrees of freedom
## Multiple R-squared: 0.7298, Adjusted R-squared: 0.7283
## F-statistic: 507.7 on 1 and 188 DF, p-value: < 2.2e-16
life_exp_1 <- -7536527910 + 620060216*1.5
life_exp_2 <- -7536527910 + 620060216*2.5
print(life_exp_1)
## [1] -6606437586
print(life_exp_2)
## [1] -5986377370
model4 <- lm(LifeExp ~ PropMD + TotExp + PropMD:TotExp, data = data)
summary(model4)
##
## Call:
## lm(formula = LifeExp ~ PropMD + TotExp + PropMD:TotExp, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -27.320 -4.132 2.098 6.540 13.074
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.277e+01 7.956e-01 78.899 < 2e-16 ***
## PropMD 1.497e+03 2.788e+02 5.371 2.32e-07 ***
## TotExp 7.233e-05 8.982e-06 8.053 9.39e-14 ***
## PropMD:TotExp -6.026e-03 1.472e-03 -4.093 6.35e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.765 on 186 degrees of freedom
## Multiple R-squared: 0.3574, Adjusted R-squared: 0.3471
## F-statistic: 34.49 on 3 and 186 DF, p-value: < 2.2e-16
F Stat: 34.49
F Stat P Val: <2.2e-16
R^2: .3574
or why not?
coefficients <- coef(model4)
int <- coefficients[1]
propCo <- coefficients[2]
totCo <- coefficients[3]
PropMDval <- .03
TotExpval <- 14
forecast_1 <- int + propCo*PropMDval + totCo*TotExpval
print(forecast_1)
## (Intercept)
## 107.6985
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
data %>% select(TotExp,PropMD,LifeExp) %>% filter(LifeExp > 100)
## # A tibble: 0 × 3
## # ℹ 3 variables: TotExp <dbl>, PropMD <dbl>, LifeExp <dbl>