Life_Exp<-read.csv("/Users/christinakasman/Downloads/Life Expectancy Data.csv")
summary(Life_Exp)
                Country          Year             Status    
 Afghanistan        :  16   Min.   :2000   Developed : 512  
 Albania            :  16   1st Qu.:2004   Developing:2426  
 Algeria            :  16   Median :2008                    
 Angola             :  16   Mean   :2008                    
 Antigua and Barbuda:  16   3rd Qu.:2012                    
 Argentina          :  16   Max.   :2015                    
 (Other)            :2842                                   
 Life.expectancy Adult.Mortality infant.deaths   
 Min.   :36.30   Min.   :  1.0   Min.   :   0.0  
 1st Qu.:63.10   1st Qu.: 74.0   1st Qu.:   0.0  
 Median :72.10   Median :144.0   Median :   3.0  
 Mean   :69.22   Mean   :164.8   Mean   :  30.3  
 3rd Qu.:75.70   3rd Qu.:228.0   3rd Qu.:  22.0  
 Max.   :89.00   Max.   :723.0   Max.   :1800.0  
 NA's   :10      NA's   :10                      
    Alcohol        percentage.expenditure  Hepatitis.B   
 Min.   : 0.0100   Min.   :    0.000      Min.   : 1.00  
 1st Qu.: 0.8775   1st Qu.:    4.685      1st Qu.:77.00  
 Median : 3.7550   Median :   64.913      Median :92.00  
 Mean   : 4.6029   Mean   :  738.251      Mean   :80.94  
 3rd Qu.: 7.7025   3rd Qu.:  441.534      3rd Qu.:97.00  
 Max.   :17.8700   Max.   :19479.912      Max.   :99.00  
 NA's   :194                              NA's   :553    
    Measles              BMI        under.five.deaths
 Min.   :     0.0   Min.   : 1.00   Min.   :   0.00  
 1st Qu.:     0.0   1st Qu.:19.30   1st Qu.:   0.00  
 Median :    17.0   Median :43.50   Median :   4.00  
 Mean   :  2419.6   Mean   :38.32   Mean   :  42.04  
 3rd Qu.:   360.2   3rd Qu.:56.20   3rd Qu.:  28.00  
 Max.   :212183.0   Max.   :87.30   Max.   :2500.00  
                    NA's   :34                       
     Polio       Total.expenditure   Diphtheria   
 Min.   : 3.00   Min.   : 0.370    Min.   : 2.00  
 1st Qu.:78.00   1st Qu.: 4.260    1st Qu.:78.00  
 Median :93.00   Median : 5.755    Median :93.00  
 Mean   :82.55   Mean   : 5.938    Mean   :82.32  
 3rd Qu.:97.00   3rd Qu.: 7.492    3rd Qu.:97.00  
 Max.   :99.00   Max.   :17.600    Max.   :99.00  
 NA's   :19      NA's   :226       NA's   :19     
    HIV.AIDS           GDP              Population       
 Min.   : 0.100   Min.   :     1.68   Min.   :3.400e+01  
 1st Qu.: 0.100   1st Qu.:   463.94   1st Qu.:1.958e+05  
 Median : 0.100   Median :  1766.95   Median :1.387e+06  
 Mean   : 1.742   Mean   :  7483.16   Mean   :1.275e+07  
 3rd Qu.: 0.800   3rd Qu.:  5910.81   3rd Qu.:7.420e+06  
 Max.   :50.600   Max.   :119172.74   Max.   :1.294e+09  
                  NA's   :448         NA's   :652        
 thinness..1.19.years thinness.5.9.years
 Min.   : 0.10        Min.   : 0.10     
 1st Qu.: 1.60        1st Qu.: 1.50     
 Median : 3.30        Median : 3.30     
 Mean   : 4.84        Mean   : 4.87     
 3rd Qu.: 7.20        3rd Qu.: 7.20     
 Max.   :27.70        Max.   :28.60     
 NA's   :34           NA's   :34        
 Income.composition.of.resources   Schooling    
 Min.   :0.0000                  Min.   : 0.00  
 1st Qu.:0.4930                  1st Qu.:10.10  
 Median :0.6770                  Median :12.30  
 Mean   :0.6276                  Mean   :11.99  
 3rd Qu.:0.7790                  3rd Qu.:14.30  
 Max.   :0.9480                  Max.   :20.70  
 NA's   :167                     NA's   :163    
plot(x=Life_Exp$BMI, y=Life_Exp$Life.expectancy, main="Life Expectancy vs BMI")

`

Predictor = BMI to explain life expectancy

#Linear Regression
LE_lm<- lm(Life.expectancy~BMI, data=Life_Exp)
summary(LE_lm)

Call:
lm(formula = Life.expectancy ~ BMI, data = Life_Exp)

Residuals:
    Min      1Q  Median      3Q     Max 
-34.660  -4.663   0.353   4.466  28.105 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept) 59.032496   0.313745   188.2   <2e-16 ***
BMI          0.269858   0.007274    37.1   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 7.812 on 2894 degrees of freedom
  (42 observations deleted due to missingness)
Multiple R-squared:  0.3223,    Adjusted R-squared:  0.322 
F-statistic:  1376 on 1 and 2894 DF,  p-value: < 2.2e-16

The p value is less than 0.05 so we can reject the null hypothesis and conclude the model is statistically significant. If the regression line is a good fit with the data, we would expect residual values that are normally distributed around a mean of zero. a good model would tend to have a median value near zero, minimum and maximum values of roughly thesame magnitude, and first and third quartile values of roughly the same magnitude. The reported R squared value shows that this model accounts for 65% of the data’s variation.

plot(x=Life_Exp$BMI, y=Life_Exp$Life.expectancy, main="Life Expectancy vs BMI")
abline(LE_lm)

#Residual Plot
plot(fitted(LE_lm),resid(LE_lm))

The residual plot should show points scattered evenly above and below 0. The residuals are how far the actual values are from the regression line.

qqnorm(resid(LE_lm))

If residuals are normally distributed, the values should follow a straight line

LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKYGBge3J9CkxpZmVfRXhwPC1yZWFkLmNzdigiL1VzZXJzL2NocmlzdGluYWthc21hbi9Eb3dubG9hZHMvTGlmZSBFeHBlY3RhbmN5IERhdGEuY3N2IikKc3VtbWFyeShMaWZlX0V4cCkKYGBgCgpgYGB7cn0KcGxvdCh4PUxpZmVfRXhwJEJNSSwgeT1MaWZlX0V4cCRMaWZlLmV4cGVjdGFuY3ksIG1haW49IkxpZmUgRXhwZWN0YW5jeSB2cyBCTUkiKQoKYGBgCmAKClByZWRpY3RvciA9IEJNSSB0byBleHBsYWluIGxpZmUgZXhwZWN0YW5jeQpgYGB7cn0KI0xpbmVhciBSZWdyZXNzaW9uCkxFX2xtPC0gbG0oTGlmZS5leHBlY3RhbmN5fkJNSSwgZGF0YT1MaWZlX0V4cCkKc3VtbWFyeShMRV9sbSkKYGBgClRoZSBwIHZhbHVlIGlzIGxlc3MgdGhhbiAwLjA1IHNvIHdlIGNhbiByZWplY3QgdGhlIG51bGwgaHlwb3RoZXNpcyBhbmQgY29uY2x1ZGUgdGhlIG1vZGVsIGlzIHN0YXRpc3RpY2FsbHkgc2lnbmlmaWNhbnQuCklmIHRoZSByZWdyZXNzaW9uIGxpbmUgaXMgYSBnb29kIGZpdCB3aXRoIHRoZSBkYXRhLCB3ZSB3b3VsZCBleHBlY3QgcmVzaWR1YWwgdmFsdWVzIHRoYXQgYXJlIG5vcm1hbGx5IGRpc3RyaWJ1dGVkIGFyb3VuZCBhIG1lYW4gb2YgemVyby4gYSBnb29kIG1vZGVsIHdvdWxkIHRlbmQgdG8gaGF2ZSBhIG1lZGlhbiB2YWx1ZSBuZWFyIHplcm8sIG1pbmltdW0gYW5kIG1heGltdW0gdmFsdWVzIG9mIHJvdWdobHkgdGhlc2FtZSBtYWduaXR1ZGUsIGFuZCBmaXJzdCBhbmQgdGhpcmQgcXVhcnRpbGUgdmFsdWVzIG9mIHJvdWdobHkgdGhlIHNhbWUgbWFnbml0dWRlLgpUaGUgcmVwb3J0ZWQgUiBzcXVhcmVkIHZhbHVlIHNob3dzIHRoYXQgdGhpcyBtb2RlbCBhY2NvdW50cyBmb3IgNjUlIG9mIHRoZSBkYXRhJ3MgdmFyaWF0aW9uLgoKCmBgYHtyfQpwbG90KHg9TGlmZV9FeHAkQk1JLCB5PUxpZmVfRXhwJExpZmUuZXhwZWN0YW5jeSwgbWFpbj0iTGlmZSBFeHBlY3RhbmN5IHZzIEJNSSIpCmFibGluZShMRV9sbSkKYGBgCgpgYGB7cn0KI1Jlc2lkdWFsIFBsb3QKcGxvdChmaXR0ZWQoTEVfbG0pLHJlc2lkKExFX2xtKSkKYGBgCgpUaGUgcmVzaWR1YWwgcGxvdCBzaG91bGQgc2hvdyBwb2ludHMgc2NhdHRlcmVkIGV2ZW5seSBhYm92ZSBhbmQgYmVsb3cgMC4gVGhlIHJlc2lkdWFscyBhcmUgaG93IGZhciB0aGUgYWN0dWFsIHZhbHVlcyBhcmUgZnJvbSB0aGUgcmVncmVzc2lvbiBsaW5lLgoKCmBgYHtyfQpxcW5vcm0ocmVzaWQoTEVfbG0pKQpgYGAKSWYgcmVzaWR1YWxzIGFyZSBub3JtYWxseSBkaXN0cmlidXRlZCwgdGhlIHZhbHVlcyBzaG91bGQgZm9sbG93IGEgc3RyYWlnaHQgbGluZQo=