data <- read.csv("who.csv", header = TRUE, stringsAsFactors = FALSE)
str(data)
## 'data.frame': 190 obs. of 10 variables:
## $ Country : chr "Afghanistan" "Albania" "Algeria" "Andorra" ...
## $ LifeExp : int 42 71 71 82 41 73 75 69 82 80 ...
## $ InfantSurvival: num 0.835 0.985 0.967 0.997 0.846 0.99 0.986 0.979 0.995 0.996 ...
## $ Under5Survival: num 0.743 0.983 0.962 0.996 0.74 0.989 0.983 0.976 0.994 0.996 ...
## $ TBFree : num 0.998 1 0.999 1 0.997 ...
## $ PropMD : num 2.29e-04 1.14e-03 1.06e-03 3.30e-03 7.04e-05 ...
## $ PropRN : num 0.000572 0.004614 0.002091 0.0035 0.001146 ...
## $ PersExp : int 20 169 108 2589 36 503 484 88 3181 3788 ...
## $ GovtExp : int 92 3128 5184 169725 1620 12543 19170 1856 187616 189354 ...
## $ TotExp : int 112 3297 5292 172314 1656 13046 19654 1944 190797 193142 ...
head(data)
## Country LifeExp InfantSurvival Under5Survival TBFree
## 1 Afghanistan 42 0.835 0.743 0.99769
## 2 Albania 71 0.985 0.983 0.99974
## 3 Algeria 71 0.967 0.962 0.99944
## 4 Andorra 82 0.997 0.996 0.99983
## 5 Angola 41 0.846 0.740 0.99656
## 6 Antigua and Barbuda 73 0.990 0.989 0.99991
## PropMD PropRN PersExp GovtExp TotExp
## 1 0.000228841 0.000572294 20 92 112
## 2 0.001143127 0.004614439 169 3128 3297
## 3 0.001060478 0.002091362 108 5184 5292
## 4 0.003297297 0.003500000 2589 169725 172314
## 5 0.000070400 0.001146162 36 1620 1656
## 6 0.000142857 0.002773810 503 12543 13046
(linear_model <- lm(data$LifeExp ~ data$TotExp))
##
## Call:
## lm(formula = data$LifeExp ~ data$TotExp)
##
## Coefficients:
## (Intercept) data$TotExp
## 6.475e+01 6.297e-05
summary(linear_model)
##
## Call:
## lm(formula = data$LifeExp ~ data$TotExp)
##
## Residuals:
## Min 1Q Median 3Q Max
## -24.764 -4.778 3.154 7.116 13.292
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.475e+01 7.535e-01 85.933 < 2e-16 ***
## data$TotExp 6.297e-05 7.795e-06 8.079 7.71e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.371 on 188 degrees of freedom
## Multiple R-squared: 0.2577, Adjusted R-squared: 0.2537
## F-statistic: 65.26 on 1 and 188 DF, p-value: 7.714e-14
We us the F-test to compare the fits of different linear models.
To check the significance of the above f-statistics =>
qf(0.05, 1, 188)
## [1] 0.003942653
plot(data=data, linear_model$residuals~TotExp)
data$LifeExp4.6 <- (data$LifeExp)^4.6
data$TotExp0.06 <- (data$TotExp)^0.06
plot(data$TotExp0.06, data$LifeExp4.6, xlab = "Total Expenditures * exp(0.06)", ylab = "Life Expectancy * exp(4.6)")
(linear_m <- lm(LifeExp4.6 ~ TotExp0.06, data = data))
##
## Call:
## lm(formula = LifeExp4.6 ~ TotExp0.06, data = data)
##
## Coefficients:
## (Intercept) TotExp0.06
## -736527909 620060216
summary(linear_m)
##
## Call:
## lm(formula = LifeExp4.6 ~ TotExp0.06, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -308616089 -53978977 13697187 59139231 211951764
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -736527910 46817945 -15.73 <2e-16 ***
## TotExp0.06 620060216 27518940 22.53 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 90490000 on 188 degrees of freedom
## Multiple R-squared: 0.7298, Adjusted R-squared: 0.7283
## F-statistic: 507.7 on 1 and 188 DF, p-value: < 2.2e-16
\(LifeExp= −736527910 + 620060216 ∗ TotExp^0.06\)
tr <- function(totexp)
{
(-736527910 + (620060216 * totexp))^(1/4.6)
}
at TotExp^0.6 = 1.5 =>
tr(1.5)
## [1] 63.31153
at TotExp^0.6 = 2.5 =>
tr(2.5)
## [1] 86.50645
\(LifeExp = b0+b1 * PropMd + b2 * TotExp + b3 * PropMD * TotExp\)
(multi_lm <- lm(data$LifeExp4.6 ~ data$PropMD + data$TotExp0.06 + data$PropMD:data$TotExp0.06))
##
## Call:
## lm(formula = data$LifeExp4.6 ~ data$PropMD + data$TotExp0.06 +
## data$PropMD:data$TotExp0.06)
##
## Coefficients:
## (Intercept) data$PropMD
## -7.244e+08 4.727e+10
## data$TotExp0.06 data$PropMD:data$TotExp0.06
## 6.048e+08 -2.121e+10
summary(multi_lm)
##
## Call:
## lm(formula = data$LifeExp4.6 ~ data$PropMD + data$TotExp0.06 +
## data$PropMD:data$TotExp0.06)
##
## Residuals:
## Min 1Q Median 3Q Max
## -296470018 -47729263 12183210 60285515 212311883
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.244e+08 5.083e+07 -14.253 <2e-16 ***
## data$PropMD 4.727e+10 2.258e+10 2.094 0.0376 *
## data$TotExp0.06 6.048e+08 3.023e+07 20.005 <2e-16 ***
## data$PropMD:data$TotExp0.06 -2.121e+10 1.131e+10 -1.876 0.0622 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 88520000 on 186 degrees of freedom
## Multiple R-squared: 0.7441, Adjusted R-squared: 0.74
## F-statistic: 180.3 on 3 and 186 DF, p-value: < 2.2e-16
multi_lm$coefficients
## (Intercept) data$PropMD
## -724418697 47273338389
## data$TotExp0.06 data$PropMD:data$TotExp0.06
## 604795792 -21214671638
using:
\(LifeExp4.6=−724418697+(47273338389∗PropMD)+(604795792∗TotExp0.06)−(21214671638∗PropMD∗TotExp0.06)\)
tr_multi <- function(propmd, totexp)
{
(-724418697 + (47273338389 * propmd) + (604795792 * totexp) - (21214671638 * propmd * totexp))^(1/4.6)
}
tr_multi(0.03, 14^0.06)
## [1] 82.56958
plot(data$PropMD, data$LifeExp, xlab = "Proportion of MDs", ylab = "Life Expectancy")