R Notebook

This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.

Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.

# call packages ‘glmtoolbox’ and ‘aod’
library(glmtoolbox)
library(aod)

# import data (Patient_data.csv) as a data frame.
Patient_data<-as.data.frame(read.csv(file.choose()))

# understand the structure of the data and the nature of elements
str(Patient_data)

'data.frame':   14 obs. of  8 variables:
 $ male      : int  1 0 1 0 0 0 0 0 1 1 ...
 $ age       : int  39 46 48 61 46 43 63 45 52 43 ...
 $ education : int  4 2 1 3 3 2 1 2 1 1 ...
 $ Cholestrol: int  195 250 245 225 285 228 205 313 260 225 ...
 $ BMI       : num  27 28.7 25.3 28.6 23.1 ...
 $ heartRate : int  80 95 75 65 85 77 60 79 76 93 ...
 $ glucose   : int  77 76 70 103 85 99 85 78 79 88 ...
 $ CHD       : int  0 0 0 1 1 1 0 1 0 0 ...

# print the first five rows of the data 
head(Patient_data,5)

NA

# print the dimension of the data frame 
dim(Patient_data)

[1] 14  8

# print the standard deviation of the variables. 
sapply(Patient_data,sd)

      male        age  education Cholestrol        BMI  heartRate    glucose        CHD 
 0.4972452  7.0023544  0.9972490 39.7566775  3.3715064 11.3962111 11.7054040  0.4972452

# generating cross-tabulations
table(Patient_data$CHD,Patient_data$male,Patient_data$education)

# check for missing values
sum(is.na(Patient_data))

[1] 0

# convert the variables ‘CHD’ ,'male',and ‘Education’ to factors
Patient_data$CHD<-factor(Patient_data$CHD)
Patient_data$male<-factor(Patient_data$male)
Patient_data$education<-factor(Patient_data$education, levels=c(1,2,3,4), ordered=T)

attach(Patient_data)

# have a look at the changes in the data after being converted
str(Patient_data)

'data.frame':   14 obs. of  8 variables:
 $ male      : Factor w/ 2 levels "0","1": 2 1 2 1 1 1 1 1 2 2 ...
 $ age       : int  39 46 48 61 46 43 63 45 52 43 ...
 $ education : Ord.factor w/ 4 levels "1"<"2"<"3"<"4": 4 2 1 3 3 2 1 2 1 1 ...
 $ Cholestrol: int  195 250 245 225 285 228 205 313 260 225 ...
 $ BMI       : num  27 28.7 25.3 28.6 23.1 ...
 $ heartRate : int  80 95 75 65 85 77 60 79 76 93 ...
 $ glucose   : int  77 76 70 103 85 99 85 78 79 88 ...
 $ CHD       : Factor w/ 2 levels "0","1": 1 1 1 2 2 2 1 2 1 1 ...

# print the summary of the data after the changes
summary(Patient_data)

 male       age        education   Cholestrol         BMI          heartRate        glucose      
 0:9   Min.   :39.00   1:6       Min.   :195.0   Min.   :21.68   Min.   :60.00   Min.   : 61.00  
 1:5   1st Qu.:43.00   2:4       1st Qu.:225.8   1st Qu.:24.04   1st Qu.:72.75   1st Qu.: 76.00  
       Median :46.00   3:3       Median :248.5   Median :26.66   Median :76.50   Median : 78.50  
       Mean   :47.57   4:1       Mean   :254.1   Mean   :26.85   Mean   :78.21   Mean   : 80.36  
       3rd Qu.:49.50             3rd Qu.:278.8   3rd Qu.:28.69   3rd Qu.:83.75   3rd Qu.: 85.00  
       Max.   :63.00             Max.   :332.0   Max.   :33.11   Max.   :98.00   Max.   :103.00  
 CHD  
 0:9  
 1:5

# implement binary logistic regression in R
CHD_Model<-glm(CHD~male+age+education+Cholestrol+BMI+heartRate+glucose, data=Patient_data, family ="binomial")

Warning: glm.fit: algorithm did not convergeWarning: glm.fit: fitted probabilities numerically 0 or 1 occurred

summary(CHD_Model)


Call:
glm(formula = CHD ~ male + age + education + Cholestrol + BMI + 
    heartRate + glucose, family = "binomial", data = Patient_data)

Deviance Residuals: 
         1           2           3           4           5           6           7           8  
-2.409e-06  -1.329e-05  -2.110e-08   2.808e-06   1.177e-05   1.394e-05  -2.110e-08   2.110e-08  
         9          10          11          12          13          14  
-9.001e-06  -1.276e-05  -4.032e-06  -2.110e-08   1.279e-05  -1.096e-05  

Coefficients:
              Estimate Std. Error z value Pr(>|z|)
(Intercept) -3.766e+03  4.845e+06  -0.001    0.999
male1       -1.051e+02  3.319e+05   0.000    1.000
age          1.384e+01  1.969e+04   0.001    0.999
education.L  2.906e+02  4.359e+05   0.001    0.999
education.Q  4.332e+02  6.666e+05   0.001    0.999
education.C  2.383e+02  3.434e+05   0.001    0.999
Cholestrol   4.960e+00  5.903e+03   0.001    0.999
BMI         -1.955e+00  3.617e+04   0.000    1.000
heartRate    8.953e+00  1.369e+04   0.001    0.999
glucose      1.570e+01  1.883e+04   0.001    0.999

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 1.8249e+01  on 13  degrees of freedom
Residual deviance: 1.0672e-09  on  4  degrees of freedom
AIC: 20

Number of Fisher Scoring iterations: 25


#solving the"glm.fit: algorithm did not converge Warning" by Use the predictor variable to perfectly predict the response variable.
predict(CHD_Model, newpatient_data=data.frame(CHD=c(0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0)))

         1          2          3          4          5          6          7          8          9 
 -26.56607  -23.15016 -301.94281   26.25913   23.39265   23.05423 -101.55052  177.39206  -23.92949 
        10         11         12         13         14 
 -23.23124  -25.53545 -518.83168   23.22641  -23.53565

summary(CHD_Model)


Call:
glm(formula = CHD ~ male + age + education + Cholestrol + BMI + 
    heartRate + glucose, family = "binomial", data = Patient_data)

Deviance Residuals: 
         1           2           3           4           5           6           7           8  
-2.409e-06  -1.329e-05  -2.110e-08   2.808e-06   1.177e-05   1.394e-05  -2.110e-08   2.110e-08  
         9          10          11          12          13          14  
-9.001e-06  -1.276e-05  -4.032e-06  -2.110e-08   1.279e-05  -1.096e-05  

Coefficients:
              Estimate Std. Error z value Pr(>|z|)
(Intercept) -3.766e+03  4.845e+06  -0.001    0.999
male1       -1.051e+02  3.319e+05   0.000    1.000
age          1.384e+01  1.969e+04   0.001    0.999
education.L  2.906e+02  4.359e+05   0.001    0.999
education.Q  4.332e+02  6.666e+05   0.001    0.999
education.C  2.383e+02  3.434e+05   0.001    0.999
Cholestrol   4.960e+00  5.903e+03   0.001    0.999
BMI         -1.955e+00  3.617e+04   0.000    1.000
heartRate    8.953e+00  1.369e+04   0.001    0.999
glucose      1.570e+01  1.883e+04   0.001    0.999

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 1.8249e+01  on 13  degrees of freedom
Residual deviance: 1.0672e-09  on  4  degrees of freedom
AIC: 20

Number of Fisher Scoring iterations: 25

#The Hosmer-Lemeshow goodness-of-fit test
hltest(CHD_Model)


   The Hosmer-Lemeshow goodness-of-fit test


         Statistic =  0 
degrees of freedom =  10 
           p-value =  1

# Condidence of interval for the model
confint(CHD_Model)

Waiting for profiling to be done...
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurredWarning: glm.fit: fitted probabilities numerically 0 or 1 occurredWarning: glm.fit: fitted probabilities numerically 0 or 1 occurredWarning: glm.fit: fitted probabilities numerically 0 or 1 occurredWarning: glm.fit: fitted probabilities numerically 0 or 1 occurredWarning: glm.fit: fitted probabilities numerically 0 or 1 occurredWarning: glm.fit: fitted probabilities numerically 0 or 1 occurredWarning: glm.fit: fitted probabilities numerically 0 or 1 occurredWarning: glm.fit: fitted probabilities numerically 0 or 1 occurredWarning: glm.fit: fitted probabilities numerically 0 or 1 occurredWarning: glm.fit: fitted probabilities numerically 0 or 1 occurredWarning: glm.fit: fitted probabilities numerically 0 or 1 occurredWarning: glm.fit: fitted probabilities numerically 0 or 1 occurredWarning: glm.fit: fitted probabilities numerically 0 or 1 occurredWarning: glm.fit: fitted probabilities numerically 0 or 1 occurredWarning: glm.fit: fitted probabilities numerically 0 or 1 occurredWarning: glm.fit: fitted probabilities numerically 0 or 1 occurredWarning: glm.fit: fitted probabilities numerically 0 or 1 occurredWarning: glm.fit: fitted probabilities numerically 0 or 1 occurredWarning: glm.fit: fitted probabilities numerically 0 or 1 occurred

                   2.5 %      97.5 %
(Intercept) -276701.2683 287072.2245
male1        -20255.7158  20782.0272
age           -1394.2434   1494.8948
education.L  -35256.1926  35837.3954
education.Q  -43948.3807  44814.8400
education.C  -22623.2565  23099.9395
Cholestrol     -388.0470    397.9662
BMI           -2863.8394   3270.8770
heartRate      -970.0128   1038.6482
glucose       -1127.7301   1200.9251

# Condidence of interval for the model using default
confint.default(CHD_Model)

                  2.5 %     97.5 %
(Intercept) -9499955.10 9492423.90
male1        -650668.00  650457.71
age           -38585.92   38613.60
education.L  -854133.74  854714.95
education.Q -1306104.22 1306970.68
education.C  -672777.66  673254.34
Cholestrol    -11564.65   11574.57
BMI           -70899.02   70895.11
heartRate     -26827.37   26845.27
glucose       -36899.99   36931.39

# Test for an overall effect of education using the wald.test().
wald.test(b = coef(CHD_Model), Sigma = vcov(CHD_Model), Terms = 4:6)

Wald test:
----------

Chi-squared test:
X2 = 5.7e-07, df = 3, P(> X2) = 1.0

# Test for an overall effect of male using the wald.test(). 
wald.test(b = coef(CHD_Model), Sigma = vcov(CHD_Model), Terms = 2)

Wald test:
----------

Chi-squared test:
X2 = 1e-07, df = 1, P(> X2) = 1.0

#  Exponentiate the coefficients to interpret as odds-ratios.
exp(coef(CHD_Model))

  (Intercept)         male1           age   education.L   education.Q   education.C    Cholestrol 
 0.000000e+00  2.175865e-46  1.027751e+06 1.609112e+126 1.410109e+188 3.238787e+103  1.425367e+02 
          BMI     heartRate       glucose 
 1.415370e-01  7.732649e+03  6.574869e+06

#  Here is the way to generate odds ratio and respective CIs.
exp(cbind(OR = coef(CHD_Model), confint(CHD_Model)))

Waiting for profiling to be done...
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurredWarning: glm.fit: fitted probabilities numerically 0 or 1 occurredWarning: glm.fit: fitted probabilities numerically 0 or 1 occurredWarning: glm.fit: fitted probabilities numerically 0 or 1 occurredWarning: glm.fit: fitted probabilities numerically 0 or 1 occurredWarning: glm.fit: fitted probabilities numerically 0 or 1 occurredWarning: glm.fit: fitted probabilities numerically 0 or 1 occurredWarning: glm.fit: fitted probabilities numerically 0 or 1 occurredWarning: glm.fit: fitted probabilities numerically 0 or 1 occurredWarning: glm.fit: fitted probabilities numerically 0 or 1 occurredWarning: glm.fit: fitted probabilities numerically 0 or 1 occurredWarning: glm.fit: fitted probabilities numerically 0 or 1 occurredWarning: glm.fit: fitted probabilities numerically 0 or 1 occurredWarning: glm.fit: fitted probabilities numerically 0 or 1 occurredWarning: glm.fit: fitted probabilities numerically 0 or 1 occurredWarning: glm.fit: fitted probabilities numerically 0 or 1 occurredWarning: glm.fit: fitted probabilities numerically 0 or 1 occurredWarning: glm.fit: fitted probabilities numerically 0 or 1 occurredWarning: glm.fit: fitted probabilities numerically 0 or 1 occurredWarning: glm.fit: fitted probabilities numerically 0 or 1 occurred

                       OR         2.5 %        97.5 %
(Intercept)  0.000000e+00  0.000000e+00           Inf
male1        2.175865e-46  0.000000e+00           Inf
age          1.027751e+06  0.000000e+00           Inf
education.L 1.609112e+126  0.000000e+00           Inf
education.Q 1.410109e+188  0.000000e+00           Inf
education.C 3.238787e+103  0.000000e+00           Inf
Cholestrol   1.425367e+02 2.973948e-169 6.831559e+172
BMI          1.415370e-01  0.000000e+00           Inf
heartRate    7.732649e+03  0.000000e+00           Inf
glucose      6.574869e+06  0.000000e+00           Inf

Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Ctrl+Alt+I.

When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Ctrl+Shift+K to preview the HTML file).

The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike Knit, Preview does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.

LS0tDQp0aXRsZTogIlIgTm90ZWJvb2siDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQpUaGlzIGlzIGFuIFtSIE1hcmtkb3duXShodHRwOi8vcm1hcmtkb3duLnJzdHVkaW8uY29tKSBOb3RlYm9vay4gV2hlbiB5b3UgZXhlY3V0ZSBjb2RlIHdpdGhpbiB0aGUgbm90ZWJvb2ssIHRoZSByZXN1bHRzIGFwcGVhciBiZW5lYXRoIHRoZSBjb2RlLiANCg0KVHJ5IGV4ZWN1dGluZyB0aGlzIGNodW5rIGJ5IGNsaWNraW5nIHRoZSAqUnVuKiBidXR0b24gd2l0aGluIHRoZSBjaHVuayBvciBieSBwbGFjaW5nIHlvdXIgY3Vyc29yIGluc2lkZSBpdCBhbmQgcHJlc3NpbmcgKkN0cmwrU2hpZnQrRW50ZXIqLiANCg0KYGBge3J9DQojIGNhbGwgcGFja2FnZXMg4oCYZ2xtdG9vbGJveOKAmSBhbmQg4oCYYW9k4oCZDQpsaWJyYXJ5KGdsbXRvb2xib3gpDQpsaWJyYXJ5KGFvZCkNCmBgYA0KDQoNCmBgYHtyfQ0KIyBpbXBvcnQgZGF0YSAoUGF0aWVudF9kYXRhLmNzdikgYXMgYSBkYXRhIGZyYW1lLg0KUGF0aWVudF9kYXRhPC1hcy5kYXRhLmZyYW1lKHJlYWQuY3N2KGZpbGUuY2hvb3NlKCkpKQ0KYGBgDQoNCg0KYGBge3J9DQojIHVuZGVyc3RhbmQgdGhlIHN0cnVjdHVyZSBvZiB0aGUgZGF0YSBhbmQgdGhlIG5hdHVyZSBvZiBlbGVtZW50cw0Kc3RyKFBhdGllbnRfZGF0YSkNCmBgYA0KDQoNCmBgYHtyfQ0KIyBwcmludCB0aGUgZmlyc3QgZml2ZSByb3dzIG9mIHRoZSBkYXRhIA0KaGVhZChQYXRpZW50X2RhdGEsNSkNCg0KYGBgDQoNCg0KYGBge3J9DQojIHByaW50IHRoZSBkaW1lbnNpb24gb2YgdGhlIGRhdGEgZnJhbWUgDQpkaW0oUGF0aWVudF9kYXRhKQ0KYGBgDQpgYGB7cn0NCiMgcHJpbnQgdGhlIHN0YW5kYXJkIGRldmlhdGlvbiBvZiB0aGUgdmFyaWFibGVzLiANCnNhcHBseShQYXRpZW50X2RhdGEsc2QpDQpgYGANCmBgYHtyfQ0KIyBnZW5lcmF0aW5nIGNyb3NzLXRhYnVsYXRpb25zDQp0YWJsZShQYXRpZW50X2RhdGEkQ0hELFBhdGllbnRfZGF0YSRtYWxlLFBhdGllbnRfZGF0YSRlZHVjYXRpb24pDQpgYGANCmBgYHtyfQ0KIyBjaGVjayBmb3IgbWlzc2luZyB2YWx1ZXMNCnN1bShpcy5uYShQYXRpZW50X2RhdGEpKQ0KYGBgDQpgYGB7cn0NCiMgY29udmVydCB0aGUgdmFyaWFibGVzIOKAmENIROKAmSAsJ21hbGUnLGFuZCDigJhFZHVjYXRpb27igJkgdG8gZmFjdG9ycw0KUGF0aWVudF9kYXRhJENIRDwtZmFjdG9yKFBhdGllbnRfZGF0YSRDSEQpDQpQYXRpZW50X2RhdGEkbWFsZTwtZmFjdG9yKFBhdGllbnRfZGF0YSRtYWxlKQ0KUGF0aWVudF9kYXRhJGVkdWNhdGlvbjwtZmFjdG9yKFBhdGllbnRfZGF0YSRlZHVjYXRpb24sIGxldmVscz1jKDEsMiwzLDQpLCBvcmRlcmVkPVQpDQoNCmF0dGFjaChQYXRpZW50X2RhdGEpDQpgYGANCmBgYHtyfQ0KIyBoYXZlIGEgbG9vayBhdCB0aGUgY2hhbmdlcyBpbiB0aGUgZGF0YSBhZnRlciBiZWluZyBjb252ZXJ0ZWQNCnN0cihQYXRpZW50X2RhdGEpDQpgYGANCmBgYHtyfQ0KIyBwcmludCB0aGUgc3VtbWFyeSBvZiB0aGUgZGF0YSBhZnRlciB0aGUgY2hhbmdlcw0Kc3VtbWFyeShQYXRpZW50X2RhdGEpDQpgYGANCmBgYHtyfQ0KIyBpbXBsZW1lbnQgYmluYXJ5IGxvZ2lzdGljIHJlZ3Jlc3Npb24gaW4gUg0KQ0hEX01vZGVsPC1nbG0oQ0hEfm1hbGUrYWdlK2VkdWNhdGlvbitDaG9sZXN0cm9sK0JNSStoZWFydFJhdGUrZ2x1Y29zZSwgZGF0YT1QYXRpZW50X2RhdGEsIGZhbWlseSA9ImJpbm9taWFsIikNCnN1bW1hcnkoQ0hEX01vZGVsKQ0KYGBgDQoNCg0KYGBge3J9DQoNCiNzb2x2aW5nIHRoZSJnbG0uZml0OiBhbGdvcml0aG0gZGlkIG5vdCBjb252ZXJnZSBXYXJuaW5nIiBieSBVc2UgdGhlIHByZWRpY3RvciB2YXJpYWJsZSB0byBwZXJmZWN0bHkgcHJlZGljdCB0aGUgcmVzcG9uc2UgdmFyaWFibGUuDQpwcmVkaWN0KENIRF9Nb2RlbCwgbmV3cGF0aWVudF9kYXRhPWRhdGEuZnJhbWUoQ0hEPWMoMCwgMCwgMCwgMSwgMSwgMSwgMCwgMSwgMCwgMCwgMCwgMCwgMSwgMCkpKQ0KYGBgDQpgYGB7cn0NCnN1bW1hcnkoQ0hEX01vZGVsKQ0KYGBgDQoNCmBgYHtyfQ0KI1RoZSBIb3NtZXItTGVtZXNob3cgZ29vZG5lc3Mtb2YtZml0IHRlc3QNCmhsdGVzdChDSERfTW9kZWwpDQpgYGANCmBgYHtyfQ0KIyBDb25kaWRlbmNlIG9mIGludGVydmFsIGZvciB0aGUgbW9kZWwNCmNvbmZpbnQoQ0hEX01vZGVsKQ0KYGBgDQpgYGB7cn0NCiMgQ29uZGlkZW5jZSBvZiBpbnRlcnZhbCBmb3IgdGhlIG1vZGVsIHVzaW5nIGRlZmF1bHQNCmNvbmZpbnQuZGVmYXVsdChDSERfTW9kZWwpDQpgYGANCg0KDQpgYGB7cn0NCiMgVGVzdCBmb3IgYW4gb3ZlcmFsbCBlZmZlY3Qgb2YgZWR1Y2F0aW9uIHVzaW5nIHRoZSB3YWxkLnRlc3QoKS4NCndhbGQudGVzdChiID0gY29lZihDSERfTW9kZWwpLCBTaWdtYSA9IHZjb3YoQ0hEX01vZGVsKSwgVGVybXMgPSA0OjYpDQpgYGANCg0KYGBge3J9DQojIFRlc3QgZm9yIGFuIG92ZXJhbGwgZWZmZWN0IG9mIG1hbGUgdXNpbmcgdGhlIHdhbGQudGVzdCgpLiANCndhbGQudGVzdChiID0gY29lZihDSERfTW9kZWwpLCBTaWdtYSA9IHZjb3YoQ0hEX01vZGVsKSwgVGVybXMgPSAyKQ0KYGBgDQpgYGB7cn0NCiMgIEV4cG9uZW50aWF0ZSB0aGUgY29lZmZpY2llbnRzIHRvIGludGVycHJldCBhcyBvZGRzLXJhdGlvcy4NCmV4cChjb2VmKENIRF9Nb2RlbCkpDQpgYGANCmBgYHtyfQ0KIyAgSGVyZSBpcyB0aGUgd2F5IHRvIGdlbmVyYXRlIG9kZHMgcmF0aW8gYW5kIHJlc3BlY3RpdmUgQ0lzLg0KZXhwKGNiaW5kKE9SID0gY29lZihDSERfTW9kZWwpLCBjb25maW50KENIRF9Nb2RlbCkpKQ0KYGBgDQoNCg0KDQpBZGQgYSBuZXcgY2h1bmsgYnkgY2xpY2tpbmcgdGhlICpJbnNlcnQgQ2h1bmsqIGJ1dHRvbiBvbiB0aGUgdG9vbGJhciBvciBieSBwcmVzc2luZyAqQ3RybCtBbHQrSSouDQoNCldoZW4geW91IHNhdmUgdGhlIG5vdGVib29rLCBhbiBIVE1MIGZpbGUgY29udGFpbmluZyB0aGUgY29kZSBhbmQgb3V0cHV0IHdpbGwgYmUgc2F2ZWQgYWxvbmdzaWRlIGl0IChjbGljayB0aGUgKlByZXZpZXcqIGJ1dHRvbiBvciBwcmVzcyAqQ3RybCtTaGlmdCtLKiB0byBwcmV2aWV3IHRoZSBIVE1MIGZpbGUpLg0KDQpUaGUgcHJldmlldyBzaG93cyB5b3UgYSByZW5kZXJlZCBIVE1MIGNvcHkgb2YgdGhlIGNvbnRlbnRzIG9mIHRoZSBlZGl0b3IuIENvbnNlcXVlbnRseSwgdW5saWtlICpLbml0KiwgKlByZXZpZXcqIGRvZXMgbm90IHJ1biBhbnkgUiBjb2RlIGNodW5rcy4gSW5zdGVhZCwgdGhlIG91dHB1dCBvZiB0aGUgY2h1bmsgd2hlbiBpdCB3YXMgbGFzdCBydW4gaW4gdGhlIGVkaXRvciBpcyBkaXNwbGF5ZWQuDQo=