#load packages:
library(gmodels)
## Warning: package 'gmodels' was built under R version 4.0.5
library(mctest)
COPD <- read.csv("VYLgxg9YEemU7w7-EFnPcg_5645e2700f5811e9b2f4133a1edfbb40_COPD_student_dataset.csv")
str(COPD)
## 'data.frame': 101 obs. of 24 variables:
## $ X : int 1 2 3 4 5 6 7 8 9 10 ...
## $ ID : int 58 57 62 145 136 84 93 27 114 152 ...
## $ AGE : int 77 79 80 56 65 67 67 83 72 75 ...
## $ PackHistory : num 60 50 11 60 68 26 50 90 50 6 ...
## $ COPDSEVERITY: chr "SEVERE" "MODERATE" "MODERATE" "VERY SEVERE" ...
## $ MWT1 : int 120 165 201 210 204 216 214 214 231 226 ...
## $ MWT2 : int 120 176 180 210 210 180 237 237 237 240 ...
## $ MWT1Best : int 120 176 201 210 210 216 237 237 237 240 ...
## $ FEV1 : num 1.21 1.09 1.52 0.47 1.07 1.09 0.69 0.68 2.13 1.06 ...
## $ FEV1PRED : num 36 56 68 14 42 50 35 32 63 46 ...
## $ FVC : num 2.4 1.64 2.3 1.14 2.91 1.99 1.31 2.23 4.38 2.06 ...
## $ FVCPRED : int 98 65 86 27 98 60 48 77 80 75 ...
## $ CAT : int 25 12 22 28 32 29 29 22 25 31 ...
## $ HAD : num 8 21 18 26 18 21 30 2 6 20 ...
## $ SGRQ : num 69.5 44.2 44.1 62 75.6 ...
## $ AGEquartiles: int 4 4 4 1 1 2 2 4 3 3 ...
## $ copd : int 3 2 2 4 3 2 3 3 2 3 ...
## $ gender : int 1 0 0 1 1 0 0 1 1 0 ...
## $ smoking : int 2 2 2 2 2 1 1 2 1 2 ...
## $ Diabetes : int 1 1 1 0 0 1 1 1 1 0 ...
## $ muscular : int 0 0 0 0 1 0 0 0 0 1 ...
## $ hypertension: int 0 0 0 1 1 0 0 0 0 0 ...
## $ AtrialFib : int 1 1 1 1 0 1 1 1 1 0 ...
## $ IHD : int 0 1 0 0 0 0 0 0 0 0 ...
#convert the class of some variables from numeric to factor:
COPD$COPDSEVERITY <- as.factor(COPD$COPDSEVERITY)
COPD$AGEquartiles <- as.factor(COPD$AGEquartiles)
COPD$copd <- as.factor(COPD$copd)
COPD$gender <- as.factor(COPD$gender)
#convert values of smoking to 0,1 instead of 1,2 (0: ex-smoker, 1: smoker)
COPD$smoking[COPD$smoking == 2] <- 0
COPD$smoking <- factor(COPD$smoking)
#create new variable "comorbid" as a substitute of different comorbidities variables
#(0: no comorbidities, 1: one or more comorbidity):
comorbid <- length(COPD$Diabetes)
comorbid[COPD$Diabetes == 1 | COPD$hypertension == 1 | COPD$AtrialFib == 1 | COPD$IHD == 1 | COPD$muscular == 1] <- 1
comorbid[is.na(comorbid)] <- 0
comorbid <- factor(comorbid)
COPD$comorbid <- comorbid
It is a dataset of 101 COPD patients. Outcome: Quality of life (SGRQ) purpose of the model: to understan the variables affecting quality of life of COPD patients. candidate predictors: patient characteristics (Age, gender, smoking, pack history), lung function (FEV1, FEV1pred, FVC, FVCpred), severity(COPDSEVERITY, CAT), comorbities.
I used tabulation with frequancies for categorical variables, and central values, quantiles and histogram for numerical variables:
CrossTable(COPD$COPDSEVERITY)
##
##
## Cell Contents
## |-------------------------|
## | N |
## | N / Table Total |
## |-------------------------|
##
##
## Total Observations in Table: 101
##
##
## | MILD | MODERATE | SEVERE | VERY SEVERE |
## |-------------|-------------|-------------|-------------|
## | 23 | 43 | 27 | 8 |
## | 0.228 | 0.426 | 0.267 | 0.079 |
## |-------------|-------------|-------------|-------------|
##
##
##
##
sum(is.na(COPD$COPDSEVERITY))
## [1] 0
CrossTable(COPD$copd)
##
##
## Cell Contents
## |-------------------------|
## | N |
## | N / Table Total |
## |-------------------------|
##
##
## Total Observations in Table: 101
##
##
## | 1 | 2 | 3 | 4 |
## |-----------|-----------|-----------|-----------|
## | 23 | 43 | 27 | 8 |
## | 0.228 | 0.426 | 0.267 | 0.079 |
## |-----------|-----------|-----------|-----------|
##
##
##
##
sum(is.na(COPD$copd))
## [1] 0
CrossTable(COPD$gender)
##
##
## Cell Contents
## |-------------------------|
## | N |
## | N / Table Total |
## |-------------------------|
##
##
## Total Observations in Table: 101
##
##
## | 0 | 1 |
## |-----------|-----------|
## | 36 | 65 |
## | 0.356 | 0.644 |
## |-----------|-----------|
##
##
##
##
sum(is.na(COPD$gender))
## [1] 0
summary(COPD$AGE)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 44.0 65.0 71.0 70.1 75.0 88.0
summary(COPD$PackHistory)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.0 20.0 36.0 39.7 54.0 109.0
summary(COPD$FEV1)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.450 1.100 1.600 1.604 1.960 3.180
hist(COPD$FEV1)
summary(COPD$FVC)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.140 2.270 2.770 2.955 3.630 5.370
hist(COPD$FVC)
summary(COPD$FEV1PRED)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 3.29 42.00 60.00 58.53 75.00 102.00
hist(COPD$FEV1PRED)
summary(COPD$FVCPRED)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 27.00 71.00 84.00 86.44 103.00 132.00
hist(COPD$FVCPRED)
summary(COPD$CAT)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 3.00 12.00 18.00 19.34 24.00 188.00
hist(COPD$CAT)
#There is an outlier, apparently it was recorded wrongly, so I am going to delete it.
COPD$CAT[COPD$CAT > 100 ] <- NA
CrossTable(COPD$comorbid)
##
##
## Cell Contents
## |-------------------------|
## | N |
## | N / Table Total |
## |-------------------------|
##
##
## Total Observations in Table: 101
##
##
## | 0 | 1 |
## |-----------|-----------|
## | 46 | 55 |
## | 0.455 | 0.545 |
## |-----------|-----------|
##
##
##
##
data <- COPD[, c("AGE", "PackHistory", "FEV1", "FEV1PRED", "FVC", "CAT", "HAD", "SGRQ")]
cor_matrix <- cor(data)
round(cor_matrix,2)
## AGE PackHistory FEV1 FEV1PRED FVC CAT HAD SGRQ
## AGE 1.00 0.00 -0.10 0.07 -0.15 NA -0.23 -0.14
## PackHistory 0.00 1.00 -0.13 -0.13 -0.09 NA 0.03 0.03
## FEV1 -0.10 -0.13 1.00 0.78 0.82 NA -0.15 -0.30
## FEV1PRED 0.07 -0.13 0.78 1.00 0.52 NA -0.11 -0.33
## FVC -0.15 -0.09 0.82 0.52 1.00 NA -0.13 -0.22
## CAT NA NA NA NA NA 1 NA NA
## HAD -0.23 0.03 -0.15 -0.11 -0.13 NA 1.00 0.40
## SGRQ -0.14 0.03 -0.30 -0.33 -0.22 NA 0.40 1.00
pairs(~AGE+PackHistory+FEV1+FEV1PRED+FVC+CAT+HAD+SGRQ, data=COPD)
CrossTable(COPD$gender, COPD$COPDSEVERITY)
##
##
## Cell Contents
## |-------------------------|
## | N |
## | Chi-square contribution |
## | N / Row Total |
## | N / Col Total |
## | N / Table Total |
## |-------------------------|
##
##
## Total Observations in Table: 101
##
##
## | COPD$COPDSEVERITY
## COPD$gender | MILD | MODERATE | SEVERE | VERY SEVERE | Row Total |
## -------------|-------------|-------------|-------------|-------------|-------------|
## 0 | 8 | 18 | 7 | 3 | 36 |
## | 0.005 | 0.466 | 0.715 | 0.008 | |
## | 0.222 | 0.500 | 0.194 | 0.083 | 0.356 |
## | 0.348 | 0.419 | 0.259 | 0.375 | |
## | 0.079 | 0.178 | 0.069 | 0.030 | |
## -------------|-------------|-------------|-------------|-------------|-------------|
## 1 | 15 | 25 | 20 | 5 | 65 |
## | 0.003 | 0.258 | 0.396 | 0.004 | |
## | 0.231 | 0.385 | 0.308 | 0.077 | 0.644 |
## | 0.652 | 0.581 | 0.741 | 0.625 | |
## | 0.149 | 0.248 | 0.198 | 0.050 | |
## -------------|-------------|-------------|-------------|-------------|-------------|
## Column Total | 23 | 43 | 27 | 8 | 101 |
## | 0.228 | 0.426 | 0.267 | 0.079 | |
## -------------|-------------|-------------|-------------|-------------|-------------|
##
##
CrossTable(COPD$gender, COPD$comorbid)
##
##
## Cell Contents
## |-------------------------|
## | N |
## | Chi-square contribution |
## | N / Row Total |
## | N / Col Total |
## | N / Table Total |
## |-------------------------|
##
##
## Total Observations in Table: 101
##
##
## | COPD$comorbid
## COPD$gender | 0 | 1 | Row Total |
## -------------|-----------|-----------|-----------|
## 0 | 18 | 18 | 36 |
## | 0.157 | 0.131 | |
## | 0.500 | 0.500 | 0.356 |
## | 0.391 | 0.327 | |
## | 0.178 | 0.178 | |
## -------------|-----------|-----------|-----------|
## 1 | 28 | 37 | 65 |
## | 0.087 | 0.073 | |
## | 0.431 | 0.569 | 0.644 |
## | 0.609 | 0.673 | |
## | 0.277 | 0.366 | |
## -------------|-----------|-----------|-----------|
## Column Total | 46 | 55 | 101 |
## | 0.455 | 0.545 | |
## -------------|-----------|-----------|-----------|
##
##
CrossTable(COPD$COPDSEVERITY, COPD$comorbid)
##
##
## Cell Contents
## |-------------------------|
## | N |
## | Chi-square contribution |
## | N / Row Total |
## | N / Col Total |
## | N / Table Total |
## |-------------------------|
##
##
## Total Observations in Table: 101
##
##
## | COPD$comorbid
## COPD$COPDSEVERITY | 0 | 1 | Row Total |
## ------------------|-----------|-----------|-----------|
## MILD | 9 | 14 | 23 |
## | 0.208 | 0.174 | |
## | 0.391 | 0.609 | 0.228 |
## | 0.196 | 0.255 | |
## | 0.089 | 0.139 | |
## ------------------|-----------|-----------|-----------|
## MODERATE | 23 | 20 | 43 |
## | 0.596 | 0.498 | |
## | 0.535 | 0.465 | 0.426 |
## | 0.500 | 0.364 | |
## | 0.228 | 0.198 | |
## ------------------|-----------|-----------|-----------|
## SEVERE | 9 | 18 | 27 |
## | 0.884 | 0.739 | |
## | 0.333 | 0.667 | 0.267 |
## | 0.196 | 0.327 | |
## | 0.089 | 0.178 | |
## ------------------|-----------|-----------|-----------|
## VERY SEVERE | 5 | 3 | 8 |
## | 0.505 | 0.422 | |
## | 0.625 | 0.375 | 0.079 |
## | 0.109 | 0.055 | |
## | 0.050 | 0.030 | |
## ------------------|-----------|-----------|-----------|
## Column Total | 46 | 55 | 101 |
## | 0.455 | 0.545 | |
## ------------------|-----------|-----------|-----------|
##
##
Lung function variables are highly corrolated so we will choose only one variable. Let’s examine them.
FEV1lr <- lm(SGRQ ~ FEV1, data= COPD)
summary(FEV1lr)
##
## Call:
## lm(formula = SGRQ ~ FEV1, data = COPD)
##
## Residuals:
## Min 1Q Median 3Q Max
## -33.857 -12.508 -2.007 12.141 36.127
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 53.378 4.512 11.830 < 2e-16 ***
## FEV1 -8.226 2.596 -3.168 0.00204 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 17.47 on 99 degrees of freedom
## Multiple R-squared: 0.09206, Adjusted R-squared: 0.08288
## F-statistic: 10.04 on 1 and 99 DF, p-value: 0.00204
confint(FEV1lr)
## 2.5 % 97.5 %
## (Intercept) 44.42496 62.330734
## FEV1 -13.37731 -3.074006
FEV1Plr <- lm(SGRQ ~ FEV1PRED, data= COPD)
summary(FEV1Plr)
##
## Call:
## lm(formula = SGRQ ~ FEV1PRED, data = COPD)
##
## Residuals:
## Min 1Q Median 3Q Max
## -30.661 -11.842 -1.754 11.973 39.970
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 56.07360 4.85507 11.55 <2e-16 ***
## FEV1PRED -0.27144 0.07756 -3.50 7e-04 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 17.29 on 99 degrees of freedom
## Multiple R-squared: 0.1101, Adjusted R-squared: 0.1011
## F-statistic: 12.25 on 1 and 99 DF, p-value: 0.0007001
confint(FEV1Plr)
## 2.5 % 97.5 %
## (Intercept) 46.440086 65.707115
## FEV1PRED -0.425343 -0.117536
FVClr <- lm(SGRQ ~ FVC, data= COPD)
summary(FVClr)
##
## Call:
## lm(formula = SGRQ ~ FVC, data = COPD)
##
## Residuals:
## Min 1Q Median 3Q Max
## -37.178 -12.438 -3.651 11.694 35.725
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 52.336 5.697 9.186 6.52e-15 ***
## FVC -4.112 1.832 -2.245 0.027 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 17.88 on 99 degrees of freedom
## Multiple R-squared: 0.04844, Adjusted R-squared: 0.03883
## F-statistic: 5.04 on 1 and 99 DF, p-value: 0.027
confint(FVClr)
## 2.5 % 97.5 %
## (Intercept) 41.031387 63.6406413
## FVC -7.746106 -0.4774988
FVCPlr <- lm(SGRQ ~ FVCPRED, data= COPD)
summary(FVCPlr)
##
## Call:
## lm(formula = SGRQ ~ FVCPRED, data = COPD)
##
## Residuals:
## Min 1Q Median 3Q Max
## -35.438 -12.645 -1.299 12.600 38.180
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 61.15614 7.19203 8.503 1.98e-13 ***
## FVCPRED -0.24261 0.08072 -3.006 0.00336 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 17.55 on 99 degrees of freedom
## Multiple R-squared: 0.08362, Adjusted R-squared: 0.07437
## F-statistic: 9.034 on 1 and 99 DF, p-value: 0.003358
confint(FVCPlr)
## 2.5 % 97.5 %
## (Intercept) 46.8855875 75.42669647
## FVCPRED -0.4027725 -0.08245099
I will choose FEV1PRED as a measure of the lung function because it has the bigger R squared that means it explains more variation in the outcome.
note: it is just a tutorial analysis. I do not know if it is a vaild variable medically. In a real research I would search in the literature.
AGElr <- lm(SGRQ ~ AGE, data= COPD)
summary(AGElr)
##
## Call:
## lm(formula = SGRQ ~ AGE, data = COPD)
##
## Residuals:
## Min 1Q Median 3Q Max
## -39.827 -11.486 -0.834 13.365 36.257
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 62.7440 16.2106 3.871 0.000195 ***
## AGE -0.3218 0.2298 -1.400 0.164552
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 18.15 on 99 degrees of freedom
## Multiple R-squared: 0.01942, Adjusted R-squared: 0.009517
## F-statistic: 1.961 on 1 and 99 DF, p-value: 0.1646
confint(AGElr)
## 2.5 % 97.5 %
## (Intercept) 30.5787348 94.9092336
## AGE -0.7778013 0.1341933
AGEQlr <- lm(SGRQ ~ AGEquartiles, data= COPD)
summary(AGEQlr)
##
## Call:
## lm(formula = SGRQ ~ AGEquartiles, data = COPD)
##
## Residuals:
## Min 1Q Median 3Q Max
## -42.024 -11.334 -1.297 12.867 41.030
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 44.024 3.583 12.287 <2e-16 ***
## AGEquartiles2 -7.614 5.172 -1.472 0.144
## AGEquartiles3 -2.401 4.976 -0.483 0.631
## AGEquartiles4 -5.987 5.230 -1.145 0.255
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 18.27 on 97 degrees of freedom
## Multiple R-squared: 0.02673, Adjusted R-squared: -0.003369
## F-statistic: 0.8881 on 3 and 97 DF, p-value: 0.4502
confint(AGEQlr)
## 2.5 % 97.5 %
## (Intercept) 36.91290 51.135565
## AGEquartiles2 -17.87856 2.650096
## AGEquartiles3 -12.27674 7.474707
## AGEquartiles4 -16.36698 4.392435
Genderlr <- lm(SGRQ ~ gender, data= COPD)
summary(Genderlr)
##
## Call:
## lm(formula = SGRQ ~ gender, data = COPD)
##
## Residuals:
## Min 1Q Median 3Q Max
## -38.695 -12.285 -2.194 14.535 36.745
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 40.6953 3.0545 13.323 <2e-16 ***
## gender1 -0.7916 3.8076 -0.208 0.836
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 18.33 on 99 degrees of freedom
## Multiple R-squared: 0.0004364, Adjusted R-squared: -0.00966
## F-statistic: 0.04322 on 1 and 99 DF, p-value: 0.8357
confint(Genderlr)
## 2.5 % 97.5 %
## (Intercept) 34.634436 46.756119
## gender1 -8.346628 6.763457
SEVlr <- lm(SGRQ ~ COPDSEVERITY, data= COPD)
summary(SEVlr)
##
## Call:
## lm(formula = SGRQ ~ COPDSEVERITY, data = COPD)
##
## Residuals:
## Min 1Q Median 3Q Max
## -33.796 -12.458 -1.646 10.402 40.048
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 32.512 3.614 8.997 1.97e-14 ***
## COPDSEVERITYMODERATE 4.966 4.477 1.109 0.27014
## COPDSEVERITYSEVERE 16.334 4.918 3.321 0.00126 **
## COPDSEVERITYVERY SEVERE 15.064 7.114 2.118 0.03677 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 17.33 on 97 degrees of freedom
## Multiple R-squared: 0.1242, Adjusted R-squared: 0.09711
## F-statistic: 4.585 on 3 and 97 DF, p-value: 0.004794
confint(SEVlr)
## 2.5 % 97.5 %
## (Intercept) 25.3398259 39.68452
## COPDSEVERITYMODERATE -3.9203519 13.85135
## COPDSEVERITYSEVERE 6.5734226 26.09408
## COPDSEVERITYVERY SEVERE 0.9452949 29.18286
CATlr <- lm(SGRQ ~ CAT, data= COPD)
summary(CATlr)
##
## Call:
## lm(formula = SGRQ ~ CAT, data = COPD)
##
## Residuals:
## Min 1Q Median 3Q Max
## -50.651 -7.239 -0.742 9.931 26.674
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 10.3689 3.1171 3.326 0.00124 **
## CAT 1.6913 0.1614 10.478 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 12.65 on 98 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.5284, Adjusted R-squared: 0.5236
## F-statistic: 109.8 on 1 and 98 DF, p-value: < 2.2e-16
confint(CATlr)
## 2.5 % 97.5 %
## (Intercept) 4.182998 16.554727
## CAT 1.370986 2.011613
copdlr <- lm(SGRQ ~ copd, data= COPD)
summary(copdlr)
##
## Call:
## lm(formula = SGRQ ~ copd, data = COPD)
##
## Residuals:
## Min 1Q Median 3Q Max
## -33.796 -12.458 -1.646 10.402 40.048
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 32.512 3.614 8.997 1.97e-14 ***
## copd2 4.966 4.477 1.109 0.27014
## copd3 16.334 4.918 3.321 0.00126 **
## copd4 15.064 7.114 2.118 0.03677 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 17.33 on 97 degrees of freedom
## Multiple R-squared: 0.1242, Adjusted R-squared: 0.09711
## F-statistic: 4.585 on 3 and 97 DF, p-value: 0.004794
confint(copdlr)
## 2.5 % 97.5 %
## (Intercept) 25.3398259 39.68452
## copd2 -3.9203519 13.85135
## copd3 6.5734226 26.09408
## copd4 0.9452949 29.18286
comorbidlr <- lm(SGRQ ~ comorbid, data= COPD)
summary(comorbidlr)
##
## Call:
## lm(formula = SGRQ ~ comorbid, data = COPD)
##
## Residuals:
## Min 1Q Median 3Q Max
## -40.129 -9.659 -3.403 13.431 38.637
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 37.863 2.684 14.106 <2e-16 ***
## comorbid1 4.266 3.637 1.173 0.244
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 18.21 on 99 degrees of freedom
## Multiple R-squared: 0.0137, Adjusted R-squared: 0.003738
## F-statistic: 1.375 on 1 and 99 DF, p-value: 0.2437
confint(comorbidlr)
## 2.5 % 97.5 %
## (Intercept) 32.537002 43.18908
## comorbid1 -2.951949 11.48295
smoklr <- lm(SGRQ ~ smoking, data= COPD)
summary(smoklr)
##
## Call:
## lm(formula = SGRQ ~ smoking, data = COPD)
##
## Residuals:
## Min 1Q Median 3Q Max
## -36.579 -13.239 -1.839 15.911 37.921
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 38.579 1.946 19.820 <2e-16 ***
## smoking1 10.141 4.890 2.074 0.0407 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 17.95 on 99 degrees of freedom
## Multiple R-squared: 0.04162, Adjusted R-squared: 0.03194
## F-statistic: 4.3 on 1 and 99 DF, p-value: 0.04072
confint(smoklr)
## 2.5 % 97.5 %
## (Intercept) 34.7171849 42.44164
## smoking1 0.4368634 19.84431
packlr <- lm(SGRQ ~ PackHistory, data= COPD)
summary(packlr)
##
## Call:
## lm(formula = SGRQ ~ PackHistory, data = COPD)
##
## Residuals:
## Min 1Q Median 3Q Max
## -37.716 -12.041 -2.084 15.323 37.008
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 39.23866 3.47775 11.28 <2e-16 ***
## PackHistory 0.02386 0.07460 0.32 0.75
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 18.32 on 99 degrees of freedom
## Multiple R-squared: 0.001032, Adjusted R-squared: -0.009058
## F-statistic: 0.1023 on 1 and 99 DF, p-value: 0.7498
confint(packlr)
## 2.5 % 97.5 %
## (Intercept) 32.3380404 46.1392809
## PackHistory -0.1241701 0.1718894
Patient characteristics: Age, smoking Lung function: FEV1PRED Severity: CAT comorbid
finallr <- lm(SGRQ ~ AGE + smoking + FEV1PRED + CAT + comorbid, data = COPD)
summary(finallr)
##
## Call:
## lm(formula = SGRQ ~ AGE + smoking + FEV1PRED + CAT + comorbid,
## data = COPD)
##
## Residuals:
## Min 1Q Median 3Q Max
## -47.229 -6.991 0.081 9.911 23.918
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 30.28134 12.43221 2.436 0.0167 *
## AGE -0.18466 0.16045 -1.151 0.2527
## smoking1 4.92285 3.49003 1.411 0.1617
## FEV1PRED -0.11454 0.05898 -1.942 0.0551 .
## CAT 1.52824 0.16933 9.025 2.17e-14 ***
## comorbid1 3.28160 2.55055 1.287 0.2014
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 12.4 on 94 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.5653, Adjusted R-squared: 0.5422
## F-statistic: 24.45 on 5 and 94 DF, p-value: 1.082e-15
confint(finallr)
## 2.5 % 97.5 %
## (Intercept) 5.5969040 54.96577686
## AGE -0.5032449 0.13392448
## smoking1 -2.0066865 11.85238804
## FEV1PRED -0.2316475 0.00257057
## CAT 1.1920429 1.86444594
## comorbid1 -1.7825793 8.34578459
par(mfrow=c(2,2))
plot(finallr)
imcdiag(finallr)
##
## Call:
## imcdiag(mod = finallr)
##
##
## All Individual Multicollinearity Diagnostics Result
##
## VIF TOL Wi Fi Leamer CVIF Klein IND1 IND2
## AGE 1.0279 0.9728 0.6637 0.8942 0.9863 1.5603 0 0.0410 0.3824
## smoking1 1.0645 0.9394 1.5327 2.0650 0.9692 1.6159 0 0.0396 0.8528
## FEV1PRED 1.1018 0.9076 2.4183 3.2584 0.9527 1.6725 0 0.0382 1.3001
## CAT 1.1453 0.8731 3.4505 4.6491 0.9344 1.7384 0 0.0368 1.7846
## comorbid1 1.0508 0.9517 1.2066 1.6257 0.9755 1.5950 0 0.0401 0.6801
##
## 1 --> COLLINEARITY is detected by the test
## 0 --> COLLINEARITY is not detected by the test
##
## AGE , smoking1 , FEV1PRED , comorbid1 , coefficient(s) are non-significant may be due to multicollinearity
##
## R-square of y on all x: 0.5653
##
## * use method argument to check which regressors may be the reason of collinearity
## ===================================
#VIFs are less than 4 indicating low ridk of colliniarity.
The quality of life score is decreased (i.e. quality of life improved) by -0.3 (± 0.21) for every 1 year increase in age with a p- value of 0.14 (non significant). It is also increasd for current smokers by 9.5 (±4.5) with p value of 0.036. It is decrease by 0.27 (±0.07) for every 1 unit increase in FEV1PRED with p value of 0.00035. It is increased by 0.27 (±0.09) for every 1 score increase in CAT with P-value 0.002. It is increased for patients with comorbidities by 5.34 (±3.29) with p value 0.1 (non significant)
The quality of life in COPD patients is affected by many variables. higher FEV1PRED scores, Lower CAT scores and quitting smoking are significant factors in improving the quality of life.