#Using the “swiss” dataset, build the best multiple regression model you can for the variable Fertility.

df <- swiss
cor(df)
##                   Fertility Agriculture Examination   Education   Catholic
## Fertility         1.0000000  0.35307918  -0.6458827 -0.66378886  0.4636847
## Agriculture       0.3530792  1.00000000  -0.6865422 -0.63952252  0.4010951
## Examination      -0.6458827 -0.68654221   1.0000000  0.69841530 -0.5727418
## Education        -0.6637889 -0.63952252   0.6984153  1.00000000 -0.1538589
## Catholic          0.4636847  0.40109505  -0.5727418 -0.15385892  1.0000000
## Infant.Mortality  0.4165560 -0.06085861  -0.1140216 -0.09932185  0.1754959
##                  Infant.Mortality
## Fertility              0.41655603
## Agriculture           -0.06085861
## Examination           -0.11402160
## Education             -0.09932185
## Catholic               0.17549591
## Infant.Mortality       1.00000000

Based on the correlation matrix, Fertility is most related to Examination and Education, so the plan is to create a multiple regression model using both features.

summary(lm(Fertility ~ Examination*Education, data = df))
## 
## Call:
## lm(formula = Fertility ~ Examination * Education, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -13.522  -6.901  -1.169   7.383  19.412 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           87.178104   4.334560  20.112   <2e-16 ***
## Examination           -0.625731   0.257117  -2.434   0.0192 *  
## Education             -0.807552   0.463474  -1.742   0.0886 .  
## Examination:Education  0.009201   0.014451   0.637   0.5277    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.043 on 43 degrees of freedom
## Multiple R-squared:  0.5101, Adjusted R-squared:  0.4759 
## F-statistic: 14.92 on 3 and 43 DF,  p-value: 8.437e-07
summary(lm(Fertility ~ Examination, data = df))
## 
## Call:
## lm(formula = Fertility ~ Examination, data = df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -25.9375  -6.0044  -0.3393   7.9239  19.7399 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  86.8185     3.2576  26.651  < 2e-16 ***
## Examination  -1.0113     0.1782  -5.675 9.45e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.642 on 45 degrees of freedom
## Multiple R-squared:  0.4172, Adjusted R-squared:  0.4042 
## F-statistic: 32.21 on 1 and 45 DF,  p-value: 9.45e-07
summary(lm(Fertility ~ Education, data = df))
## 
## Call:
## lm(formula = Fertility ~ Education, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -17.036  -6.711  -1.011   9.526  19.689 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  79.6101     2.1041  37.836  < 2e-16 ***
## Education    -0.8624     0.1448  -5.954 3.66e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.446 on 45 degrees of freedom
## Multiple R-squared:  0.4406, Adjusted R-squared:  0.4282 
## F-statistic: 35.45 on 1 and 45 DF,  p-value: 3.659e-07

Using these features, the R-squared value is low, so other features will be tested.

summary(lm(Fertility ~ Agriculture, data = df))
## 
## Call:
## lm(formula = Fertility ~ Agriculture, data = df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -25.5374  -7.8685  -0.6362   9.0464  24.4858 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 60.30438    4.25126  14.185   <2e-16 ***
## Agriculture  0.19420    0.07671   2.532   0.0149 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 11.82 on 45 degrees of freedom
## Multiple R-squared:  0.1247, Adjusted R-squared:  0.1052 
## F-statistic: 6.409 on 1 and 45 DF,  p-value: 0.01492
summary(lm(Fertility ~ Examination, data = df))
## 
## Call:
## lm(formula = Fertility ~ Examination, data = df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -25.9375  -6.0044  -0.3393   7.9239  19.7399 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  86.8185     3.2576  26.651  < 2e-16 ***
## Examination  -1.0113     0.1782  -5.675 9.45e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.642 on 45 degrees of freedom
## Multiple R-squared:  0.4172, Adjusted R-squared:  0.4042 
## F-statistic: 32.21 on 1 and 45 DF,  p-value: 9.45e-07
summary(lm(Fertility ~ Education, data = df))
## 
## Call:
## lm(formula = Fertility ~ Education, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -17.036  -6.711  -1.011   9.526  19.689 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  79.6101     2.1041  37.836  < 2e-16 ***
## Education    -0.8624     0.1448  -5.954 3.66e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.446 on 45 degrees of freedom
## Multiple R-squared:  0.4406, Adjusted R-squared:  0.4282 
## F-statistic: 35.45 on 1 and 45 DF,  p-value: 3.659e-07
summary(lm(Fertility ~ Catholic, data = df))
## 
## Call:
## lm(formula = Fertility ~ Catholic, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -35.309  -4.060   0.511   6.851  16.682 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 64.42826    2.30510  27.950  < 2e-16 ***
## Catholic     0.13889    0.03956   3.511  0.00103 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 11.19 on 45 degrees of freedom
## Multiple R-squared:  0.215,  Adjusted R-squared:  0.1976 
## F-statistic: 12.33 on 1 and 45 DF,  p-value: 0.001029
summary(lm(Fertility ~ Infant.Mortality, data = df))
## 
## Call:
## lm(formula = Fertility ~ Infant.Mortality, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -31.672  -5.687  -0.381   7.239  28.565 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)   
## (Intercept)       34.5155    11.7113   2.947  0.00507 **
## Infant.Mortality   1.7865     0.5812   3.074  0.00359 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 11.48 on 45 degrees of freedom
## Multiple R-squared:  0.1735, Adjusted R-squared:  0.1552 
## F-statistic: 9.448 on 1 and 45 DF,  p-value: 0.003585
summary(lm(Fertility ~ Agriculture*Examination*Education*Catholic*Infant.Mortality, data = df))
## 
## Call:
## lm(formula = Fertility ~ Agriculture * Examination * Education * 
##     Catholic * Infant.Mortality, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.7861 -2.6263  0.0973  2.3639  9.5548 
## 
## Coefficients:
##                                                               Estimate
## (Intercept)                                                  1.017e+03
## Agriculture                                                 -1.312e+01
## Examination                                                 -2.403e+01
## Education                                                   -9.896e+01
## Catholic                                                    -1.006e+01
## Infant.Mortality                                            -4.174e+01
## Agriculture:Examination                                      2.071e-01
## Agriculture:Education                                        1.387e+00
## Examination:Education                                        2.721e+00
## Agriculture:Catholic                                         1.353e-01
## Examination:Catholic                                         4.886e-01
## Education:Catholic                                           9.542e-01
## Agriculture:Infant.Mortality                                 5.675e-01
## Examination:Infant.Mortality                                 9.692e-01
## Education:Infant.Mortality                                   4.511e+00
## Catholic:Infant.Mortality                                    4.584e-01
## Agriculture:Examination:Education                           -2.933e-02
## Agriculture:Examination:Catholic                            -5.551e-03
## Agriculture:Education:Catholic                              -1.066e-02
## Examination:Education:Catholic                              -2.550e-02
## Agriculture:Examination:Infant.Mortality                    -6.727e-03
## Agriculture:Education:Infant.Mortality                      -6.230e-02
## Examination:Education:Infant.Mortality                      -1.188e-01
## Agriculture:Catholic:Infant.Mortality                       -5.830e-03
## Examination:Catholic:Infant.Mortality                       -2.029e-02
## Education:Catholic:Infant.Mortality                         -4.291e-02
## Agriculture:Examination:Education:Catholic                   9.402e-06
## Agriculture:Examination:Education:Infant.Mortality           1.176e-03
## Agriculture:Examination:Catholic:Infant.Mortality            1.984e-04
## Agriculture:Education:Catholic:Infant.Mortality              4.408e-04
## Examination:Education:Catholic:Infant.Mortality              9.393e-04
## Agriculture:Examination:Education:Catholic:Infant.Mortality  6.292e-06
##                                                             Std. Error t value
## (Intercept)                                                  1.106e+03   0.919
## Agriculture                                                  1.721e+01  -0.762
## Examination                                                  4.747e+01  -0.506
## Education                                                    8.530e+01  -1.160
## Catholic                                                     1.740e+01  -0.578
## Infant.Mortality                                             5.360e+01  -0.779
## Agriculture:Examination                                      7.504e-01   0.276
## Agriculture:Education                                        1.341e+00   1.034
## Examination:Education                                        3.040e+00   0.895
## Agriculture:Catholic                                         2.399e-01   0.564
## Examination:Catholic                                         9.428e-01   0.518
## Education:Catholic                                           1.745e+00   0.547
## Agriculture:Infant.Mortality                                 8.337e-01   0.681
## Examination:Infant.Mortality                                 2.312e+00   0.419
## Education:Infant.Mortality                                   4.113e+00   1.097
## Catholic:Infant.Mortality                                    8.504e-01   0.539
## Agriculture:Examination:Education                            4.995e-02  -0.587
## Agriculture:Examination:Catholic                             1.306e-02  -0.425
## Agriculture:Education:Catholic                               2.238e-02  -0.476
## Examination:Education:Catholic                               6.952e-02  -0.367
## Agriculture:Examination:Infant.Mortality                     3.680e-02  -0.183
## Agriculture:Education:Infant.Mortality                       6.448e-02  -0.966
## Examination:Education:Infant.Mortality                       1.466e-01  -0.811
## Agriculture:Catholic:Infant.Mortality                        1.174e-02  -0.497
## Examination:Catholic:Infant.Mortality                        4.675e-02  -0.434
## Education:Catholic:Infant.Mortality                          8.466e-02  -0.507
## Agriculture:Examination:Education:Catholic                   9.692e-04   0.010
## Agriculture:Examination:Education:Infant.Mortality           2.446e-03   0.481
## Agriculture:Examination:Catholic:Infant.Mortality            6.505e-04   0.305
## Agriculture:Education:Catholic:Infant.Mortality              1.104e-03   0.399
## Examination:Education:Catholic:Infant.Mortality              3.469e-03   0.271
## Agriculture:Examination:Education:Catholic:Infant.Mortality  4.968e-05   0.127
##                                                             Pr(>|t|)
## (Intercept)                                                    0.372
## Agriculture                                                    0.458
## Examination                                                    0.620
## Education                                                      0.264
## Catholic                                                       0.572
## Infant.Mortality                                               0.448
## Agriculture:Examination                                        0.786
## Agriculture:Education                                          0.317
## Examination:Education                                          0.385
## Agriculture:Catholic                                           0.581
## Examination:Catholic                                           0.612
## Education:Catholic                                             0.593
## Agriculture:Infant.Mortality                                   0.506
## Examination:Infant.Mortality                                   0.681
## Education:Infant.Mortality                                     0.290
## Catholic:Infant.Mortality                                      0.598
## Agriculture:Examination:Education                              0.566
## Agriculture:Examination:Catholic                               0.677
## Agriculture:Education:Catholic                                 0.641
## Examination:Education:Catholic                                 0.719
## Agriculture:Examination:Infant.Mortality                       0.857
## Agriculture:Education:Infant.Mortality                         0.349
## Examination:Education:Infant.Mortality                         0.430
## Agriculture:Catholic:Infant.Mortality                          0.627
## Examination:Catholic:Infant.Mortality                          0.671
## Education:Catholic:Infant.Mortality                            0.620
## Agriculture:Examination:Education:Catholic                     0.992
## Agriculture:Examination:Education:Infant.Mortality             0.638
## Agriculture:Examination:Catholic:Infant.Mortality              0.765
## Agriculture:Education:Catholic:Infant.Mortality                0.695
## Examination:Education:Catholic:Infant.Mortality                0.790
## Agriculture:Examination:Education:Catholic:Infant.Mortality    0.901
## 
## Residual standard error: 7.165 on 15 degrees of freedom
## Multiple R-squared:  0.8927, Adjusted R-squared:  0.671 
## F-statistic: 4.027 on 31 and 15 DF,  p-value: 0.003078

Using any one feature, the R-squared value was about 0.5 or less, but using all features we get an R-squared value of 0.8927. So this seems to be a good model for Fertility to consider all features.

#Then build a logistic regression model for predicting Fertility > 70.0.

above_70 <- ifelse(df$Fertility > 70, 1, 0)
log_mod <- glm(above_70 ~ Agriculture*Examination*Education*Catholic*Infant.Mortality, family = "binomial", data = df)
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(log_mod)
## 
## Call:
## glm(formula = above_70 ~ Agriculture * Examination * Education * 
##     Catholic * Infant.Mortality, family = "binomial", data = df)
## 
## Coefficients:
##                                                               Estimate
## (Intercept)                                                  1.649e+04
## Agriculture                                                 -2.411e+02
## Examination                                                 -7.073e+02
## Education                                                   -6.000e+02
## Catholic                                                    -1.542e+02
## Infant.Mortality                                            -8.121e+02
## Agriculture:Examination                                      1.010e+01
## Agriculture:Education                                        2.212e+00
## Examination:Education                                        2.706e+01
## Agriculture:Catholic                                         2.795e+00
## Examination:Catholic                                        -2.431e+00
## Education:Catholic                                           1.880e+01
## Agriculture:Infant.Mortality                                 1.206e+01
## Examination:Infant.Mortality                                 3.557e+01
## Education:Infant.Mortality                                   3.225e+01
## Catholic:Infant.Mortality                                    8.459e+00
## Agriculture:Examination:Education                           -7.973e-02
## Agriculture:Examination:Catholic                             1.879e-02
## Agriculture:Education:Catholic                              -3.122e-01
## Examination:Education:Catholic                              -5.716e-01
## Agriculture:Examination:Infant.Mortality                    -5.215e-01
## Agriculture:Education:Infant.Mortality                      -1.838e-01
## Examination:Education:Infant.Mortality                      -1.468e+00
## Agriculture:Catholic:Infant.Mortality                       -1.534e-01
## Examination:Catholic:Infant.Mortality                        3.109e-02
## Education:Catholic:Infant.Mortality                         -1.003e+00
## Agriculture:Examination:Education:Catholic                   1.083e-02
## Agriculture:Examination:Education:Infant.Mortality           7.705e-03
## Agriculture:Examination:Catholic:Infant.Mortality            6.442e-04
## Agriculture:Education:Catholic:Infant.Mortality              1.698e-02
## Examination:Education:Catholic:Infant.Mortality              3.285e-02
## Agriculture:Examination:Education:Catholic:Infant.Mortality -6.260e-04
##                                                             Std. Error z value
## (Intercept)                                                  8.158e+07       0
## Agriculture                                                  1.152e+06       0
## Examination                                                  4.495e+06       0
## Education                                                    6.117e+06       0
## Catholic                                                     1.352e+06       0
## Infant.Mortality                                             3.900e+06       0
## Agriculture:Examination                                      5.788e+04       0
## Agriculture:Education                                        8.469e+04       0
## Examination:Education                                        2.675e+05       0
## Agriculture:Catholic                                         1.622e+04       0
## Examination:Catholic                                         1.535e+05       0
## Education:Catholic                                           1.534e+05       0
## Agriculture:Infant.Mortality                                 5.281e+04       0
## Examination:Infant.Mortality                                 2.215e+05       0
## Education:Infant.Mortality                                   2.886e+05       0
## Catholic:Infant.Mortality                                    6.230e+04       0
## Agriculture:Examination:Education                            2.888e+03       0
## Agriculture:Examination:Catholic                             1.823e+03       0
## Agriculture:Education:Catholic                               3.479e+03       0
## Examination:Education:Catholic                               4.705e+03       0
## Agriculture:Examination:Infant.Mortality                     2.682e+03       0
## Agriculture:Education:Infant.Mortality                       3.866e+03       0
## Examination:Education:Infant.Mortality                       1.257e+04       0
## Agriculture:Catholic:Infant.Mortality                        7.924e+02       0
## Examination:Catholic:Infant.Mortality                        6.624e+03       0
## Education:Catholic:Infant.Mortality                          7.933e+03       0
## Agriculture:Examination:Education:Catholic                   1.368e+02       0
## Agriculture:Examination:Education:Infant.Mortality           1.200e+02       0
## Agriculture:Examination:Catholic:Infant.Mortality            7.014e+01       0
## Agriculture:Education:Catholic:Infant.Mortality              1.868e+02       0
## Examination:Education:Catholic:Infant.Mortality              2.623e+02       0
## Agriculture:Examination:Education:Catholic:Infant.Mortality  8.210e+00       0
##                                                             Pr(>|z|)
## (Intercept)                                                        1
## Agriculture                                                        1
## Examination                                                        1
## Education                                                          1
## Catholic                                                           1
## Infant.Mortality                                                   1
## Agriculture:Examination                                            1
## Agriculture:Education                                              1
## Examination:Education                                              1
## Agriculture:Catholic                                               1
## Examination:Catholic                                               1
## Education:Catholic                                                 1
## Agriculture:Infant.Mortality                                       1
## Examination:Infant.Mortality                                       1
## Education:Infant.Mortality                                         1
## Catholic:Infant.Mortality                                          1
## Agriculture:Examination:Education                                  1
## Agriculture:Examination:Catholic                                   1
## Agriculture:Education:Catholic                                     1
## Examination:Education:Catholic                                     1
## Agriculture:Examination:Infant.Mortality                           1
## Agriculture:Education:Infant.Mortality                             1
## Examination:Education:Infant.Mortality                             1
## Agriculture:Catholic:Infant.Mortality                              1
## Examination:Catholic:Infant.Mortality                              1
## Education:Catholic:Infant.Mortality                                1
## Agriculture:Examination:Education:Catholic                         1
## Agriculture:Examination:Education:Infant.Mortality                 1
## Agriculture:Examination:Catholic:Infant.Mortality                  1
## Agriculture:Education:Catholic:Infant.Mortality                    1
## Examination:Education:Catholic:Infant.Mortality                    1
## Agriculture:Examination:Education:Catholic:Infant.Mortality        1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 6.5135e+01  on 46  degrees of freedom
## Residual deviance: 2.0699e-09  on 15  degrees of freedom
## AIC: 64
## 
## Number of Fisher Scoring iterations: 25
ggplot(df, aes(x= Fertility, y= above_70)) + geom_point() +
      stat_smooth(method="glm", color="green", se=FALSE, 
                method.args = list(family=binomial)) + 
  labs(title = "Logistic Regression for Fertility > 70",
       x = "Fertility",
       y = "Probability of Fertility > 70")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred