#load packages:

library(gmodels)
## Warning: package 'gmodels' was built under R version 4.0.5
library(mctest)

Load and Clean the data:

COPD <- read.csv("VYLgxg9YEemU7w7-EFnPcg_5645e2700f5811e9b2f4133a1edfbb40_COPD_student_dataset.csv")
str(COPD)
## 'data.frame':    101 obs. of  24 variables:
##  $ X           : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ ID          : int  58 57 62 145 136 84 93 27 114 152 ...
##  $ AGE         : int  77 79 80 56 65 67 67 83 72 75 ...
##  $ PackHistory : num  60 50 11 60 68 26 50 90 50 6 ...
##  $ COPDSEVERITY: chr  "SEVERE" "MODERATE" "MODERATE" "VERY SEVERE" ...
##  $ MWT1        : int  120 165 201 210 204 216 214 214 231 226 ...
##  $ MWT2        : int  120 176 180 210 210 180 237 237 237 240 ...
##  $ MWT1Best    : int  120 176 201 210 210 216 237 237 237 240 ...
##  $ FEV1        : num  1.21 1.09 1.52 0.47 1.07 1.09 0.69 0.68 2.13 1.06 ...
##  $ FEV1PRED    : num  36 56 68 14 42 50 35 32 63 46 ...
##  $ FVC         : num  2.4 1.64 2.3 1.14 2.91 1.99 1.31 2.23 4.38 2.06 ...
##  $ FVCPRED     : int  98 65 86 27 98 60 48 77 80 75 ...
##  $ CAT         : int  25 12 22 28 32 29 29 22 25 31 ...
##  $ HAD         : num  8 21 18 26 18 21 30 2 6 20 ...
##  $ SGRQ        : num  69.5 44.2 44.1 62 75.6 ...
##  $ AGEquartiles: int  4 4 4 1 1 2 2 4 3 3 ...
##  $ copd        : int  3 2 2 4 3 2 3 3 2 3 ...
##  $ gender      : int  1 0 0 1 1 0 0 1 1 0 ...
##  $ smoking     : int  2 2 2 2 2 1 1 2 1 2 ...
##  $ Diabetes    : int  1 1 1 0 0 1 1 1 1 0 ...
##  $ muscular    : int  0 0 0 0 1 0 0 0 0 1 ...
##  $ hypertension: int  0 0 0 1 1 0 0 0 0 0 ...
##  $ AtrialFib   : int  1 1 1 1 0 1 1 1 1 0 ...
##  $ IHD         : int  0 1 0 0 0 0 0 0 0 0 ...
#convert the class of some variables from numeric to factor:
COPD$COPDSEVERITY <- as.factor(COPD$COPDSEVERITY)
COPD$AGEquartiles <- as.factor(COPD$AGEquartiles)
COPD$copd <- as.factor(COPD$copd)
COPD$gender <- as.factor(COPD$gender)
#convert values of smoking to 0,1 instead of 1,2 (0: ex-smoker, 1: smoker)
COPD$smoking[COPD$smoking == 2] <- 0
COPD$smoking <- factor(COPD$smoking)
#create new variable "comorbid" as a substitute of different comorbidities variables
#(0: no comorbidities, 1: one or more comorbidity):
comorbid <- length(COPD$Diabetes)
comorbid[COPD$Diabetes == 1 | COPD$hypertension == 1 | COPD$AtrialFib == 1 | COPD$IHD == 1 | COPD$muscular == 1] <- 1
comorbid[is.na(comorbid)] <- 0
comorbid <- factor(comorbid)
COPD$comorbid <- comorbid

About the data:

It is a dataset of 101 COPD patients. Outcome: Quality of life (SGRQ) purpose of the model: to understan the variables affecting quality of life of COPD patients. candidate predictors: patient characteristics (Age, gender, smoking, pack history), lung function (FEV1, FEV1pred, FVC, FVCpred), severity(COPDSEVERITY, CAT), comorbities.

Explore the data:

Inspect variables and missing values:

I used tabulation with frequancies for categorical variables, and central values, quantiles and histogram for numerical variables:

CrossTable(COPD$COPDSEVERITY)
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  101 
## 
##  
##             |        MILD |    MODERATE |      SEVERE | VERY SEVERE | 
##             |-------------|-------------|-------------|-------------|
##             |          23 |          43 |          27 |           8 | 
##             |       0.228 |       0.426 |       0.267 |       0.079 | 
##             |-------------|-------------|-------------|-------------|
## 
## 
## 
## 
sum(is.na(COPD$COPDSEVERITY))
## [1] 0
CrossTable(COPD$copd)
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  101 
## 
##  
##           |         1 |         2 |         3 |         4 | 
##           |-----------|-----------|-----------|-----------|
##           |        23 |        43 |        27 |         8 | 
##           |     0.228 |     0.426 |     0.267 |     0.079 | 
##           |-----------|-----------|-----------|-----------|
## 
## 
## 
## 
sum(is.na(COPD$copd))
## [1] 0
CrossTable(COPD$gender)
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  101 
## 
##  
##           |         0 |         1 | 
##           |-----------|-----------|
##           |        36 |        65 | 
##           |     0.356 |     0.644 | 
##           |-----------|-----------|
## 
## 
## 
## 
sum(is.na(COPD$gender))
## [1] 0
summary(COPD$AGE)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    44.0    65.0    71.0    70.1    75.0    88.0
summary(COPD$PackHistory)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     1.0    20.0    36.0    39.7    54.0   109.0
summary(COPD$FEV1)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.450   1.100   1.600   1.604   1.960   3.180
hist(COPD$FEV1)

summary(COPD$FVC)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.140   2.270   2.770   2.955   3.630   5.370
hist(COPD$FVC)

summary(COPD$FEV1PRED)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    3.29   42.00   60.00   58.53   75.00  102.00
hist(COPD$FEV1PRED)

summary(COPD$FVCPRED)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   27.00   71.00   84.00   86.44  103.00  132.00
hist(COPD$FVCPRED)

summary(COPD$CAT)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    3.00   12.00   18.00   19.34   24.00  188.00
hist(COPD$CAT)

#There is an outlier, apparently it was  recorded wrongly, so I am going to delete it.
COPD$CAT[COPD$CAT > 100 ] <- NA

CrossTable(COPD$comorbid)
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  101 
## 
##  
##           |         0 |         1 | 
##           |-----------|-----------|
##           |        46 |        55 | 
##           |     0.455 |     0.545 | 
##           |-----------|-----------|
## 
## 
## 
## 

Examine colliniarity:

data <- COPD[, c("AGE", "PackHistory", "FEV1", "FEV1PRED", "FVC", "CAT", "HAD", "SGRQ")]
cor_matrix <- cor(data)
round(cor_matrix,2)
##               AGE PackHistory  FEV1 FEV1PRED   FVC CAT   HAD  SGRQ
## AGE          1.00        0.00 -0.10     0.07 -0.15  NA -0.23 -0.14
## PackHistory  0.00        1.00 -0.13    -0.13 -0.09  NA  0.03  0.03
## FEV1        -0.10       -0.13  1.00     0.78  0.82  NA -0.15 -0.30
## FEV1PRED     0.07       -0.13  0.78     1.00  0.52  NA -0.11 -0.33
## FVC         -0.15       -0.09  0.82     0.52  1.00  NA -0.13 -0.22
## CAT            NA          NA    NA       NA    NA   1    NA    NA
## HAD         -0.23        0.03 -0.15    -0.11 -0.13  NA  1.00  0.40
## SGRQ        -0.14        0.03 -0.30    -0.33 -0.22  NA  0.40  1.00
pairs(~AGE+PackHistory+FEV1+FEV1PRED+FVC+CAT+HAD+SGRQ, data=COPD)

CrossTable(COPD$gender, COPD$COPDSEVERITY)
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## | Chi-square contribution |
## |           N / Row Total |
## |           N / Col Total |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  101 
## 
##  
##              | COPD$COPDSEVERITY 
##  COPD$gender |        MILD |    MODERATE |      SEVERE | VERY SEVERE |   Row Total | 
## -------------|-------------|-------------|-------------|-------------|-------------|
##            0 |           8 |          18 |           7 |           3 |          36 | 
##              |       0.005 |       0.466 |       0.715 |       0.008 |             | 
##              |       0.222 |       0.500 |       0.194 |       0.083 |       0.356 | 
##              |       0.348 |       0.419 |       0.259 |       0.375 |             | 
##              |       0.079 |       0.178 |       0.069 |       0.030 |             | 
## -------------|-------------|-------------|-------------|-------------|-------------|
##            1 |          15 |          25 |          20 |           5 |          65 | 
##              |       0.003 |       0.258 |       0.396 |       0.004 |             | 
##              |       0.231 |       0.385 |       0.308 |       0.077 |       0.644 | 
##              |       0.652 |       0.581 |       0.741 |       0.625 |             | 
##              |       0.149 |       0.248 |       0.198 |       0.050 |             | 
## -------------|-------------|-------------|-------------|-------------|-------------|
## Column Total |          23 |          43 |          27 |           8 |         101 | 
##              |       0.228 |       0.426 |       0.267 |       0.079 |             | 
## -------------|-------------|-------------|-------------|-------------|-------------|
## 
## 
CrossTable(COPD$gender, COPD$comorbid)
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## | Chi-square contribution |
## |           N / Row Total |
## |           N / Col Total |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  101 
## 
##  
##              | COPD$comorbid 
##  COPD$gender |         0 |         1 | Row Total | 
## -------------|-----------|-----------|-----------|
##            0 |        18 |        18 |        36 | 
##              |     0.157 |     0.131 |           | 
##              |     0.500 |     0.500 |     0.356 | 
##              |     0.391 |     0.327 |           | 
##              |     0.178 |     0.178 |           | 
## -------------|-----------|-----------|-----------|
##            1 |        28 |        37 |        65 | 
##              |     0.087 |     0.073 |           | 
##              |     0.431 |     0.569 |     0.644 | 
##              |     0.609 |     0.673 |           | 
##              |     0.277 |     0.366 |           | 
## -------------|-----------|-----------|-----------|
## Column Total |        46 |        55 |       101 | 
##              |     0.455 |     0.545 |           | 
## -------------|-----------|-----------|-----------|
## 
## 
CrossTable(COPD$COPDSEVERITY, COPD$comorbid)
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## | Chi-square contribution |
## |           N / Row Total |
## |           N / Col Total |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  101 
## 
##  
##                   | COPD$comorbid 
## COPD$COPDSEVERITY |         0 |         1 | Row Total | 
## ------------------|-----------|-----------|-----------|
##              MILD |         9 |        14 |        23 | 
##                   |     0.208 |     0.174 |           | 
##                   |     0.391 |     0.609 |     0.228 | 
##                   |     0.196 |     0.255 |           | 
##                   |     0.089 |     0.139 |           | 
## ------------------|-----------|-----------|-----------|
##          MODERATE |        23 |        20 |        43 | 
##                   |     0.596 |     0.498 |           | 
##                   |     0.535 |     0.465 |     0.426 | 
##                   |     0.500 |     0.364 |           | 
##                   |     0.228 |     0.198 |           | 
## ------------------|-----------|-----------|-----------|
##            SEVERE |         9 |        18 |        27 | 
##                   |     0.884 |     0.739 |           | 
##                   |     0.333 |     0.667 |     0.267 | 
##                   |     0.196 |     0.327 |           | 
##                   |     0.089 |     0.178 |           | 
## ------------------|-----------|-----------|-----------|
##       VERY SEVERE |         5 |         3 |         8 | 
##                   |     0.505 |     0.422 |           | 
##                   |     0.625 |     0.375 |     0.079 | 
##                   |     0.109 |     0.055 |           | 
##                   |     0.050 |     0.030 |           | 
## ------------------|-----------|-----------|-----------|
##      Column Total |        46 |        55 |       101 | 
##                   |     0.455 |     0.545 |           | 
## ------------------|-----------|-----------|-----------|
## 
## 

Lung function variables are highly corrolated so we will choose only one variable. Let’s examine them.

Simple linear regressions:

FEV1lr <- lm(SGRQ ~ FEV1, data= COPD)
summary(FEV1lr)
## 
## Call:
## lm(formula = SGRQ ~ FEV1, data = COPD)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -33.857 -12.508  -2.007  12.141  36.127 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   53.378      4.512  11.830  < 2e-16 ***
## FEV1          -8.226      2.596  -3.168  0.00204 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 17.47 on 99 degrees of freedom
## Multiple R-squared:  0.09206,    Adjusted R-squared:  0.08288 
## F-statistic: 10.04 on 1 and 99 DF,  p-value: 0.00204
confint(FEV1lr)
##                 2.5 %    97.5 %
## (Intercept)  44.42496 62.330734
## FEV1        -13.37731 -3.074006
FEV1Plr <- lm(SGRQ ~ FEV1PRED, data= COPD)
summary(FEV1Plr)
## 
## Call:
## lm(formula = SGRQ ~ FEV1PRED, data = COPD)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -30.661 -11.842  -1.754  11.973  39.970 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 56.07360    4.85507   11.55   <2e-16 ***
## FEV1PRED    -0.27144    0.07756   -3.50    7e-04 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 17.29 on 99 degrees of freedom
## Multiple R-squared:  0.1101, Adjusted R-squared:  0.1011 
## F-statistic: 12.25 on 1 and 99 DF,  p-value: 0.0007001
confint(FEV1Plr)
##                 2.5 %    97.5 %
## (Intercept) 46.440086 65.707115
## FEV1PRED    -0.425343 -0.117536
FVClr <- lm(SGRQ ~ FVC, data= COPD)
summary(FVClr)
## 
## Call:
## lm(formula = SGRQ ~ FVC, data = COPD)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -37.178 -12.438  -3.651  11.694  35.725 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   52.336      5.697   9.186 6.52e-15 ***
## FVC           -4.112      1.832  -2.245    0.027 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 17.88 on 99 degrees of freedom
## Multiple R-squared:  0.04844,    Adjusted R-squared:  0.03883 
## F-statistic:  5.04 on 1 and 99 DF,  p-value: 0.027
confint(FVClr)
##                 2.5 %     97.5 %
## (Intercept) 41.031387 63.6406413
## FVC         -7.746106 -0.4774988
FVCPlr <- lm(SGRQ ~ FVCPRED, data= COPD)
summary(FVCPlr)
## 
## Call:
## lm(formula = SGRQ ~ FVCPRED, data = COPD)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -35.438 -12.645  -1.299  12.600  38.180 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 61.15614    7.19203   8.503 1.98e-13 ***
## FVCPRED     -0.24261    0.08072  -3.006  0.00336 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 17.55 on 99 degrees of freedom
## Multiple R-squared:  0.08362,    Adjusted R-squared:  0.07437 
## F-statistic: 9.034 on 1 and 99 DF,  p-value: 0.003358
confint(FVCPlr)
##                  2.5 %      97.5 %
## (Intercept) 46.8855875 75.42669647
## FVCPRED     -0.4027725 -0.08245099

I will choose FEV1PRED as a measure of the lung function because it has the bigger R squared that means it explains more variation in the outcome.

note: it is just a tutorial analysis. I do not know if it is a vaild variable medically. In a real research I would search in the literature.

AGElr <- lm(SGRQ ~ AGE, data= COPD)
summary(AGElr)
## 
## Call:
## lm(formula = SGRQ ~ AGE, data = COPD)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -39.827 -11.486  -0.834  13.365  36.257 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  62.7440    16.2106   3.871 0.000195 ***
## AGE          -0.3218     0.2298  -1.400 0.164552    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 18.15 on 99 degrees of freedom
## Multiple R-squared:  0.01942,    Adjusted R-squared:  0.009517 
## F-statistic: 1.961 on 1 and 99 DF,  p-value: 0.1646
confint(AGElr)
##                  2.5 %     97.5 %
## (Intercept) 30.5787348 94.9092336
## AGE         -0.7778013  0.1341933
AGEQlr <- lm(SGRQ ~ AGEquartiles, data= COPD)
summary(AGEQlr)
## 
## Call:
## lm(formula = SGRQ ~ AGEquartiles, data = COPD)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -42.024 -11.334  -1.297  12.867  41.030 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     44.024      3.583  12.287   <2e-16 ***
## AGEquartiles2   -7.614      5.172  -1.472    0.144    
## AGEquartiles3   -2.401      4.976  -0.483    0.631    
## AGEquartiles4   -5.987      5.230  -1.145    0.255    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 18.27 on 97 degrees of freedom
## Multiple R-squared:  0.02673,    Adjusted R-squared:  -0.003369 
## F-statistic: 0.8881 on 3 and 97 DF,  p-value: 0.4502
confint(AGEQlr)
##                   2.5 %    97.5 %
## (Intercept)    36.91290 51.135565
## AGEquartiles2 -17.87856  2.650096
## AGEquartiles3 -12.27674  7.474707
## AGEquartiles4 -16.36698  4.392435
Genderlr <- lm(SGRQ ~ gender, data= COPD)
summary(Genderlr)
## 
## Call:
## lm(formula = SGRQ ~ gender, data = COPD)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -38.695 -12.285  -2.194  14.535  36.745 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  40.6953     3.0545  13.323   <2e-16 ***
## gender1      -0.7916     3.8076  -0.208    0.836    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 18.33 on 99 degrees of freedom
## Multiple R-squared:  0.0004364,  Adjusted R-squared:  -0.00966 
## F-statistic: 0.04322 on 1 and 99 DF,  p-value: 0.8357
confint(Genderlr)
##                 2.5 %    97.5 %
## (Intercept) 34.634436 46.756119
## gender1     -8.346628  6.763457
SEVlr <- lm(SGRQ ~ COPDSEVERITY, data= COPD)
summary(SEVlr)
## 
## Call:
## lm(formula = SGRQ ~ COPDSEVERITY, data = COPD)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -33.796 -12.458  -1.646  10.402  40.048 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)               32.512      3.614   8.997 1.97e-14 ***
## COPDSEVERITYMODERATE       4.966      4.477   1.109  0.27014    
## COPDSEVERITYSEVERE        16.334      4.918   3.321  0.00126 ** 
## COPDSEVERITYVERY SEVERE   15.064      7.114   2.118  0.03677 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 17.33 on 97 degrees of freedom
## Multiple R-squared:  0.1242, Adjusted R-squared:  0.09711 
## F-statistic: 4.585 on 3 and 97 DF,  p-value: 0.004794
confint(SEVlr)
##                              2.5 %   97.5 %
## (Intercept)             25.3398259 39.68452
## COPDSEVERITYMODERATE    -3.9203519 13.85135
## COPDSEVERITYSEVERE       6.5734226 26.09408
## COPDSEVERITYVERY SEVERE  0.9452949 29.18286
CATlr <- lm(SGRQ ~ CAT, data= COPD)
summary(CATlr)
## 
## Call:
## lm(formula = SGRQ ~ CAT, data = COPD)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -50.651  -7.239  -0.742   9.931  26.674 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  10.3689     3.1171   3.326  0.00124 ** 
## CAT           1.6913     0.1614  10.478  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 12.65 on 98 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.5284, Adjusted R-squared:  0.5236 
## F-statistic: 109.8 on 1 and 98 DF,  p-value: < 2.2e-16
confint(CATlr)
##                2.5 %    97.5 %
## (Intercept) 4.182998 16.554727
## CAT         1.370986  2.011613
copdlr <- lm(SGRQ ~ copd, data= COPD)
summary(copdlr)
## 
## Call:
## lm(formula = SGRQ ~ copd, data = COPD)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -33.796 -12.458  -1.646  10.402  40.048 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   32.512      3.614   8.997 1.97e-14 ***
## copd2          4.966      4.477   1.109  0.27014    
## copd3         16.334      4.918   3.321  0.00126 ** 
## copd4         15.064      7.114   2.118  0.03677 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 17.33 on 97 degrees of freedom
## Multiple R-squared:  0.1242, Adjusted R-squared:  0.09711 
## F-statistic: 4.585 on 3 and 97 DF,  p-value: 0.004794
confint(copdlr)
##                  2.5 %   97.5 %
## (Intercept) 25.3398259 39.68452
## copd2       -3.9203519 13.85135
## copd3        6.5734226 26.09408
## copd4        0.9452949 29.18286
comorbidlr <- lm(SGRQ ~ comorbid, data= COPD)
summary(comorbidlr)
## 
## Call:
## lm(formula = SGRQ ~ comorbid, data = COPD)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -40.129  -9.659  -3.403  13.431  38.637 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   37.863      2.684  14.106   <2e-16 ***
## comorbid1      4.266      3.637   1.173    0.244    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 18.21 on 99 degrees of freedom
## Multiple R-squared:  0.0137, Adjusted R-squared:  0.003738 
## F-statistic: 1.375 on 1 and 99 DF,  p-value: 0.2437
confint(comorbidlr)
##                 2.5 %   97.5 %
## (Intercept) 32.537002 43.18908
## comorbid1   -2.951949 11.48295
smoklr <- lm(SGRQ ~ smoking, data= COPD)
summary(smoklr)
## 
## Call:
## lm(formula = SGRQ ~ smoking, data = COPD)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -36.579 -13.239  -1.839  15.911  37.921 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   38.579      1.946  19.820   <2e-16 ***
## smoking1      10.141      4.890   2.074   0.0407 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 17.95 on 99 degrees of freedom
## Multiple R-squared:  0.04162,    Adjusted R-squared:  0.03194 
## F-statistic:   4.3 on 1 and 99 DF,  p-value: 0.04072
confint(smoklr)
##                  2.5 %   97.5 %
## (Intercept) 34.7171849 42.44164
## smoking1     0.4368634 19.84431
packlr <- lm(SGRQ ~ PackHistory, data= COPD)
summary(packlr)
## 
## Call:
## lm(formula = SGRQ ~ PackHistory, data = COPD)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -37.716 -12.041  -2.084  15.323  37.008 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 39.23866    3.47775   11.28   <2e-16 ***
## PackHistory  0.02386    0.07460    0.32     0.75    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 18.32 on 99 degrees of freedom
## Multiple R-squared:  0.001032,   Adjusted R-squared:  -0.009058 
## F-statistic: 0.1023 on 1 and 99 DF,  p-value: 0.7498
confint(packlr)
##                  2.5 %     97.5 %
## (Intercept) 32.3380404 46.1392809
## PackHistory -0.1241701  0.1718894

Considered variables:

Patient characteristics: Age, smoking Lung function: FEV1PRED Severity: CAT comorbid

Multiple Linear Regression

finallr <- lm(SGRQ ~ AGE + smoking + FEV1PRED + CAT + comorbid, data = COPD)
summary(finallr)
## 
## Call:
## lm(formula = SGRQ ~ AGE + smoking + FEV1PRED + CAT + comorbid, 
##     data = COPD)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -47.229  -6.991   0.081   9.911  23.918 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 30.28134   12.43221   2.436   0.0167 *  
## AGE         -0.18466    0.16045  -1.151   0.2527    
## smoking1     4.92285    3.49003   1.411   0.1617    
## FEV1PRED    -0.11454    0.05898  -1.942   0.0551 .  
## CAT          1.52824    0.16933   9.025 2.17e-14 ***
## comorbid1    3.28160    2.55055   1.287   0.2014    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 12.4 on 94 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.5653, Adjusted R-squared:  0.5422 
## F-statistic: 24.45 on 5 and 94 DF,  p-value: 1.082e-15
confint(finallr)
##                  2.5 %      97.5 %
## (Intercept)  5.5969040 54.96577686
## AGE         -0.5032449  0.13392448
## smoking1    -2.0066865 11.85238804
## FEV1PRED    -0.2316475  0.00257057
## CAT          1.1920429  1.86444594
## comorbid1   -1.7825793  8.34578459
par(mfrow=c(2,2))
plot(finallr)

imcdiag(finallr)
## 
## Call:
## imcdiag(mod = finallr)
## 
## 
## All Individual Multicollinearity Diagnostics Result
## 
##              VIF    TOL     Wi     Fi Leamer   CVIF Klein   IND1   IND2
## AGE       1.0279 0.9728 0.6637 0.8942 0.9863 1.5603     0 0.0410 0.3824
## smoking1  1.0645 0.9394 1.5327 2.0650 0.9692 1.6159     0 0.0396 0.8528
## FEV1PRED  1.1018 0.9076 2.4183 3.2584 0.9527 1.6725     0 0.0382 1.3001
## CAT       1.1453 0.8731 3.4505 4.6491 0.9344 1.7384     0 0.0368 1.7846
## comorbid1 1.0508 0.9517 1.2066 1.6257 0.9755 1.5950     0 0.0401 0.6801
## 
## 1 --> COLLINEARITY is detected by the test 
## 0 --> COLLINEARITY is not detected by the test
## 
## AGE , smoking1 , FEV1PRED , comorbid1 , coefficient(s) are non-significant may be due to multicollinearity
## 
## R-square of y on all x: 0.5653 
## 
## * use method argument to check which regressors may be the reason of collinearity
## ===================================
#VIFs are less than 4 indicating low ridk of colliniarity.

Results:

The quality of life score is decreased (i.e. quality of life improved) by -0.3 (± 0.21) for every 1 year increase in age with a p- value of 0.14 (non significant). It is also increasd for current smokers by 9.5 (±4.5) with p value of 0.036. It is decrease by 0.27 (±0.07) for every 1 unit increase in FEV1PRED with p value of 0.00035. It is increased by 0.27 (±0.09) for every 1 score increase in CAT with P-value 0.002. It is increased for patients with comorbidities by 5.34 (±3.29) with p value 0.1 (non significant)

Conclusion:

The quality of life in COPD patients is affected by many variables. higher FEV1PRED scores, Lower CAT scores and quitting smoking are significant factors in improving the quality of life.