Libraries

library(dplyr)

Simple logistic regression

df <- read.csv("LogisticRegression.csv")
names(df)
## [1] "CRP"          "Triglyceride" "Cholesterol"  "Improvement"  "Group"
table(df$Improvement)
## 
##  No Yes 
## 156 144
levels(df$Improvement)
## [1] "No"  "Yes"
crp_outcome <- glm(Improvement ~ CRP,  # Formula
                   data = df,  # Data object
                   family = "binomial")  # Binomial for dichotomous outcome
summary(crp_outcome)
## 
## Call:
## glm(formula = Improvement ~ CRP, family = "binomial", data = df)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.248  -1.145  -1.075   1.208   1.308  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)
## (Intercept)  0.31252    0.56380   0.554    0.579
## CRP         -0.03983    0.05601  -0.711    0.477
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 415.41  on 299  degrees of freedom
## Residual deviance: 414.90  on 298  degrees of freedom
## AIC: 418.9
## 
## Number of Fisher Scoring iterations: 3
exp(-0.03983)
## [1] 0.9609528
exp(confint(crp_outcome))
## Waiting for profiling to be done...
##                 2.5 %   97.5 %
## (Intercept) 0.4530342 4.159610
## CRP         0.8602580 1.072253

Multivariable logistic regression

crp_chol_outcome <- glm(Improvement ~ CRP + Cholesterol,
                        data = df,
                        family = "binomial")
summary(crp_chol_outcome)
## 
## Call:
## glm(formula = Improvement ~ CRP + Cholesterol, family = "binomial", 
##     data = df)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.237  -1.151  -1.014   1.182   1.408  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)
## (Intercept)  0.28794    0.56547   0.509    0.611
## CRP         -0.00745    0.06351  -0.117    0.907
## Cholesterol -0.05298    0.04891  -1.083    0.279
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 415.41  on 299  degrees of freedom
## Residual deviance: 413.72  on 297  degrees of freedom
## AIC: 419.72
## 
## Number of Fisher Scoring iterations: 3

Adding a dichotomous nominal variable to the model

table(df$Group)
## 
##   A   C 
## 150 150
df$Group <- factor(df$Group,
                   levels = c("C", "A"))  # Listed in reverse order
table(df$Group)
## 
##   C   A 
## 150 150
crp_chol_group_outcome <- glm(Improvement ~ CRP + Cholesterol + Group,
                              data = df,
                              family = "binomial")
summary(crp_chol_group_outcome)
## 
## Call:
## glm(formula = Improvement ~ CRP + Cholesterol + Group, family = "binomial", 
##     data = df)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.3802  -1.0748  -0.8885   1.1521   1.4931  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)   
## (Intercept) -0.037206   0.583715  -0.064  0.94918   
## CRP         -0.003009   0.064343  -0.047  0.96270   
## Cholesterol -0.062970   0.049741  -1.266  0.20553   
## GroupA       0.669787   0.235858   2.840  0.00451 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 415.41  on 299  degrees of freedom
## Residual deviance: 405.53  on 296  degrees of freedom
## AIC: 413.53
## 
## Number of Fisher Scoring iterations: 4
exp(0.669787)  # Exponent of the coefficient of the Group variable
## [1] 1.953821
exp(confint(crp_chol_group_outcome))
## Waiting for profiling to be done...
##                 2.5 %   97.5 %
## (Intercept) 0.3058754 3.037067
## CRP         0.8783664 1.131252
## Cholesterol 0.8509512 1.034644
## GroupA      1.2338960 3.113735

Adding a multi-level nominal categorical variable to the model

set.seed(1)  # For reproducible results
df <- mutate(df,
             Stage = sample(c("I", "II", "III", "IV"),
                            size = 300,
                            replace = TRUE))
names(df)  # List the variables
## [1] "CRP"          "Triglyceride" "Cholesterol"  "Improvement"  "Group"       
## [6] "Stage"
table(df$Stage)
## 
##   I  II III  IV 
##  79  81  67  73
df$Stage <- factor(df$Stage,
                   levels = c("I", "II", "III", "IV"))
levels(df$Stage)
## [1] "I"   "II"  "III" "IV"
crp_chol_group_stage_outcome <- glm(Improvement ~ CRP + Cholesterol + Group + Stage,
                                    data = df,
                                    family = "binomial")
summary(crp_chol_group_stage_outcome)
## 
## Call:
## glm(formula = Improvement ~ CRP + Cholesterol + Group + Stage, 
##     family = "binomial", data = df)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.4592  -1.1262  -0.8634   1.1537   1.5693  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)   
## (Intercept)  0.107254   0.610740   0.176  0.86060   
## CRP          0.003433   0.064995   0.053  0.95788   
## Cholesterol -0.065287   0.050026  -1.305  0.19188   
## GroupA       0.664812   0.239848   2.772  0.00557 **
## StageII     -0.115640   0.324237  -0.357  0.72135   
## StageIII    -0.316224   0.340689  -0.928  0.35331   
## StageIV     -0.375277   0.332166  -1.130  0.25857   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 415.41  on 299  degrees of freedom
## Residual deviance: 403.89  on 293  degrees of freedom
## AIC: 417.89
## 
## Number of Fisher Scoring iterations: 4

Forward and backward stepwise methods

simple_model <- glm(Improvement ~ 1,
                    data = df,
                    family = "binomial")
summary(simple_model)
## 
## Call:
## glm(formula = Improvement ~ 1, family = "binomial", data = df)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.144  -1.144  -1.144   1.212   1.212  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.08004    0.11556  -0.693    0.489
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 415.41  on 299  degrees of freedom
## Residual deviance: 415.41  on 299  degrees of freedom
## AIC: 417.41
## 
## Number of Fisher Scoring iterations: 3
step(simple_model,
     direction = "forward",
     scope = formula(crp_chol_group_stage_outcome))
## Start:  AIC=417.41
## Improvement ~ 1
## 
##               Df Deviance    AIC
## + Group        1   407.68 411.68
## <none>             415.41 417.41
## + Cholesterol  1   413.74 417.74
## + CRP          1   414.90 418.90
## + Stage        3   413.34 421.34
## 
## Step:  AIC=411.68
## Improvement ~ Group
## 
##               Df Deviance    AIC
## + Cholesterol  1   405.53 411.53
## <none>             407.68 411.68
## + CRP          1   407.15 413.15
## + Stage        3   406.02 416.02
## 
## Step:  AIC=411.53
## Improvement ~ Group + Cholesterol
## 
##         Df Deviance    AIC
## <none>       405.53 411.53
## + CRP    1   405.53 413.53
## + Stage  3   403.89 415.89
## 
## Call:  glm(formula = Improvement ~ Group + Cholesterol, family = "binomial", 
##     data = df)
## 
## Coefficients:
## (Intercept)       GroupA  Cholesterol  
##    -0.06095      0.67006     -0.06406  
## 
## Degrees of Freedom: 299 Total (i.e. Null);  297 Residual
## Null Deviance:       415.4 
## Residual Deviance: 405.5     AIC: 411.5
best_model_forward <- glm(Improvement ~ Group + Cholesterol,
                          data = df,
                          family = "binomial")
summary(best_model_forward)
## 
## Call:
## glm(formula = Improvement ~ Group + Cholesterol, family = "binomial", 
##     data = df)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.3844  -1.0753  -0.8903   1.1535   1.4918  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)   
## (Intercept) -0.06095    0.28786  -0.212  0.83231   
## GroupA       0.67006    0.23579   2.842  0.00449 **
## Cholesterol -0.06406    0.04399  -1.456  0.14534   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 415.41  on 299  degrees of freedom
## Residual deviance: 405.53  on 297  degrees of freedom
## AIC: 411.53
## 
## Number of Fisher Scoring iterations: 4
step(crp_chol_group_stage_outcome,
     direction = "backward")
## Start:  AIC=417.89
## Improvement ~ CRP + Cholesterol + Group + Stage
## 
##               Df Deviance    AIC
## - Stage        3   405.53 413.53
## - CRP          1   403.89 415.89
## - Cholesterol  1   405.60 417.60
## <none>             403.89 417.89
## - Group        1   411.69 423.69
## 
## Step:  AIC=413.53
## Improvement ~ CRP + Cholesterol + Group
## 
##               Df Deviance    AIC
## - CRP          1   405.53 411.53
## - Cholesterol  1   407.15 413.15
## <none>             405.53 413.53
## - Group        1   413.72 419.72
## 
## Step:  AIC=411.53
## Improvement ~ Cholesterol + Group
## 
##               Df Deviance    AIC
## <none>             405.53 411.53
## - Cholesterol  1   407.68 411.68
## - Group        1   413.74 417.74
## 
## Call:  glm(formula = Improvement ~ Cholesterol + Group, family = "binomial", 
##     data = df)
## 
## Coefficients:
## (Intercept)  Cholesterol       GroupA  
##    -0.06095     -0.06406      0.67006  
## 
## Degrees of Freedom: 299 Total (i.e. Null);  297 Residual
## Null Deviance:       415.4 
## Residual Deviance: 405.5     AIC: 411.5