library(tinytex)
## Warning: package 'tinytex' was built under R version 4.3.3

\[Import \ the \ data\]

data= read.csv("C:/Users/Chafiaa/Downloads/who.csv")
head(data)
##               Country LifeExp InfantSurvival Under5Survival  TBFree      PropMD
## 1         Afghanistan      42          0.835          0.743 0.99769 0.000228841
## 2             Albania      71          0.985          0.983 0.99974 0.001143127
## 3             Algeria      71          0.967          0.962 0.99944 0.001060478
## 4             Andorra      82          0.997          0.996 0.99983 0.003297297
## 5              Angola      41          0.846          0.740 0.99656 0.000070400
## 6 Antigua and Barbuda      73          0.990          0.989 0.99991 0.000142857
##        PropRN PersExp GovtExp TotExp
## 1 0.000572294      20      92    112
## 2 0.004614439     169    3128   3297
## 3 0.002091362     108    5184   5292
## 4 0.003500000    2589  169725 172314
## 5 0.001146162      36    1620   1656
## 6 0.002773810     503   12543  13046

\[Answer1\]

model <- lm(LifeExp ~ TotExp, data)
summary(model)
## 
## Call:
## lm(formula = LifeExp ~ TotExp, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -24.764  -4.778   3.154   7.116  13.292 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 6.475e+01  7.535e-01  85.933  < 2e-16 ***
## TotExp      6.297e-05  7.795e-06   8.079 7.71e-14 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.371 on 188 degrees of freedom
## Multiple R-squared:  0.2577, Adjusted R-squared:  0.2537 
## F-statistic: 65.26 on 1 and 188 DF,  p-value: 7.714e-14

##F=65.26 , P= 7.714e-14, SE=7.535e-01, R^2= 0.2577

plot(LifeExp ~ TotExp, data)
abline(model)

plot(model$residuals ~ data$TotExp)
abline(h = 0, lty = 3)

hist(model$residuals)

qqnorm(model$residuals)
qqline(model$residuals)

##our assumption of linear regression wasn't met R^2 value is 26% far from 100% , graphs present too many outliers , so the value of F, P, R^2 won't make any difference.

\[Answer2\]

data$LifeExpXform <- data$LifeExp^4.6
data$TotExpXform <- data$TotExp^0.06
model <- lm(LifeExpXform ~ TotExp, data)
summary(model)
## 
## Call:
## lm(formula = LifeExpXform ~ TotExp, data = data)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -309107631 -103496133   18566535  100019031  273607812 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 2.542e+08  1.064e+07   23.89   <2e-16 ***
## TotExp      1.290e+03  1.101e+02   11.72   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 132300000 on 188 degrees of freedom
## Multiple R-squared:  0.4222, Adjusted R-squared:  0.4191 
## F-statistic: 137.4 on 1 and 188 DF,  p-value: < 2.2e-16

##F:137.4, R-squared: 0.4222, p-value:2.2e-16,Std. Error:1.064e+07

plot(LifeExpXform ~ TotExpXform, data)
abline(model)

plot(model$residuals ~ data$TotExpXform)
abline(h = 0, lty = 3)  

hist(model$residuals)

qqnorm(model$residuals)
qqline(model$residuals)

##this model better then then model one but I still can see outliers, and the distribution close to normal our assumption is met with this model & F=137.4 significant relationship 

\[Answer3\]

pred_lifeExp <- 2.542e+08 + 1.290e+03 * 1.5
(pred_lifeExp <- pred_lifeExp^(1/4.6))
## [1] 67.17579

\[answer4\] \[LifeExp = b0+b1 x PropMd + b2 x TotExp +b3 x PropMD x TotExp\]

model <- lm(LifeExp ~ PropMD * TotExp, data)
summary(model)
## 
## Call:
## lm(formula = LifeExp ~ PropMD * TotExp, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -27.320  -4.132   2.098   6.540  13.074 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    6.277e+01  7.956e-01  78.899  < 2e-16 ***
## PropMD         1.497e+03  2.788e+02   5.371 2.32e-07 ***
## TotExp         7.233e-05  8.982e-06   8.053 9.39e-14 ***
## PropMD:TotExp -6.026e-03  1.472e-03  -4.093 6.35e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.765 on 186 degrees of freedom
## Multiple R-squared:  0.3574, Adjusted R-squared:  0.3471 
## F-statistic: 34.49 on 3 and 186 DF,  p-value: < 2.2e-16

##F=34.49 , p-value: < 2.2e-16, R-squared: 0.3574 bad model, SDE= 8.765 , P= 2.2e-16

plot(LifeExp ~ PropMD * TotExp, data)

abline(model)
## Warning in abline(model): only using the first two of 4 regression coefficients

plot(model$residuals ~ PropMD * TotExp, data)

abline(h = 0, lty = 3)

hist(model$residuals)

qqnorm(model$residuals)
qqline(model$residuals)

#assumption is not met and the model is bad 35% R^2

\[Answer5\]

(pred_lifeExp <- 6.277e+01 + 1.497e+03 * 0.03 + 7.233e-05 * 14 + -6.026e-03 * 0.03 * 14)
## [1] 107.6785
##almost 108 years it unusual for people to live this long and it will be presented as outlier