KHCT Ngay 5

df = matrix(c(91, 104, 235, 39, 73, 48, 18, 31, 161), nrow = 3, byrow = TRUE)
chisq.test(df)

## 
##  Pearson's Chi-squared test
## 
## data:  df
## X-squared = 86.023, df = 4, p-value < 2.2e-16

df = read.csv("C:\\Users\\Luke Do\\Dropbox\\PC\\Downloads\\DỮ LIỆU ĐÍNH KÈM BÀI TẬP\\Diabetes data.csv")
head(df)

##   id age    sex height weight systBP diastBP brother parents hypertension   bmi
## 1  1  54 Female    150   68.5    150      80       0       0            1 30.44
## 2  2  62 Female    165   75.5    170      80       0       0            1 27.73
## 3  3  49 Female    152   71.0    130      90       1       1            1 30.73
## 4  4  45 Female    154   58.0    110      90       0       1            1 24.46
## 5  5  54 Female    151   54.0    100      80       1       1            0 23.68
## 6  6  46 Female    156   76.0    120      80       0       1            0 31.23
##    whr nrisks  group
## 1 0.85      3 Normal
## 2 1.00      3    IFG
## 3 0.82      4 Normal
## 4 0.81      3 Normal
## 5 0.80      3 Normal
## 6 0.92      3 Normal

df$diab = ifelse(df$group == "DM", 1, 0)
head(df)

##   id age    sex height weight systBP diastBP brother parents hypertension   bmi
## 1  1  54 Female    150   68.5    150      80       0       0            1 30.44
## 2  2  62 Female    165   75.5    170      80       0       0            1 27.73
## 3  3  49 Female    152   71.0    130      90       1       1            1 30.73
## 4  4  45 Female    154   58.0    110      90       0       1            1 24.46
## 5  5  54 Female    151   54.0    100      80       1       1            0 23.68
## 6  6  46 Female    156   76.0    120      80       0       1            0 31.23
##    whr nrisks  group diab
## 1 0.85      3 Normal    0
## 2 1.00      3    IFG    0
## 3 0.82      4 Normal    0
## 4 0.81      3 Normal    0
## 5 0.80      3 Normal    0
## 6 0.92      3 Normal    0

library(lessR)

## 
## lessR 4.3.9                         feedback: gerbing@pdx.edu 
## --------------------------------------------------------------
## > d <- Read("")   Read text, Excel, SPSS, SAS, or R data file
##   d is default data frame, data= in analysis routines optional
## 
## Many examples of reading, writing, and manipulating data, 
## graphics, testing means and proportions, regression, factor analysis,
## customization, and descriptive statistics from pivot tables
##   Enter: browseVignettes("lessR")
## 
## View lessR updates, now including time series forecasting
##   Enter: news(package="lessR")
## 
## Interactive data analysis
##   Enter: interact()

## 
## Attaching package: 'lessR'

## The following object is masked from 'package:base':
## 
##     sort_by

m.1 = Logit(diab ~ sex, brief = TRUE, data = df)

## 
## >>> Note:  sex is not a numeric variable.
##            Indicator variables are created and analyzed.
## 
## Response Variable:   diab
## Predictor Variable 1:  sexMale
## 
## Number of cases (rows) of data:  3165 
## Number of cases retained for analysis:  3165 
## 
## 
##    BASIC ANALYSIS 
## 
## -- Estimated Model of diab for the Logit of Reference Group Membership
## 
##              Estimate    Std Err  z-value  p-value   Lower 95%   Upper 95%
## (Intercept)   -2.6174     0.0854  -30.637    0.000     -2.7849     -2.4500 
##     sexMale    0.3590     0.1376    2.609    0.009      0.0893      0.6286 
## 
## 
## -- Odds Ratios and Confidence Intervals
## 
##              Odds Ratio   Lower 95%   Upper 95%
## (Intercept)      0.0730      0.0617      0.0863 
##     sexMale      1.4319      1.0935      1.8750 
## 
## 
## -- Model Fit
## 
##     Null deviance: 1709.356 on 3164 degrees of freedom
## Residual deviance: 1702.715 on 3163 degrees of freedom
## 
## AIC: 1706.715 
## 
## Number of iterations to convergence: 5

VIEC 3

bw=read.csv("C:\\Users\\Luke Do\\Dropbox\\PC\\Downloads\\DỮ LIỆU ĐÍNH KÈM BÀI TẬP\\birthwt.csv")
str(bw)

## 'data.frame':    189 obs. of  11 variables:
##  $ id   : int  85 86 87 88 89 91 92 93 94 95 ...
##  $ low  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ age  : int  19 33 20 21 18 21 22 17 29 26 ...
##  $ lwt  : int  182 155 105 108 107 124 118 103 123 113 ...
##  $ race : int  2 3 1 1 1 3 1 3 1 1 ...
##  $ smoke: int  0 0 1 1 1 0 0 0 1 1 ...
##  $ ptl  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ ht   : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ ui   : int  1 0 0 1 1 0 0 0 0 0 ...
##  $ ftv  : int  0 3 1 2 0 0 1 1 1 0 ...
##  $ bwt  : int  2523 2551 2557 2594 2600 2622 2637 2637 2663 2665 ...

library(lessR)
Logit(low ~ factor(smoke), data = bw, brief = TRUE)

## 
## Response Variable:   low
## Predictor Variable 1:  smoke
## 
## Number of cases (rows) of data:  189 
## Number of cases retained for analysis:  189 
## 
## 
##    BASIC ANALYSIS 
## 
## -- Estimated Model of low for the Logit of Reference Group Membership
## 
##                 Estimate    Std Err  z-value  p-value   Lower 95%   Upper 95%
##    (Intercept)   -1.0871     0.2147   -5.062    0.000     -1.5079     -0.6662 
## factor(smoke)1    0.7041     0.3196    2.203    0.028      0.0776      1.3305 
## 
## 
## -- Odds Ratios and Confidence Intervals
## 
##                 Odds Ratio   Lower 95%   Upper 95%
##    (Intercept)      0.3372      0.2214      0.5137 
## factor(smoke)1      2.0219      1.0807      3.7831 
## 
## 
## -- Model Fit
## 
##     Null deviance: 234.672 on 188 degrees of freedom
## Residual deviance: 229.805 on 187 degrees of freedom
## 
## AIC: 233.8046 
## 
## Number of iterations to convergence: 4

Logit(low ~ factor(smoke) + age + factor(race), data = bw, brief = TRUE)

## 
## Response Variable:   low
## Predictor Variable 1:  smoke
## Predictor Variable 2:  age
## Predictor Variable 3:  race
## 
## Number of cases (rows) of data:  189 
## Number of cases retained for analysis:  189 
## 
## 
##    BASIC ANALYSIS 
## 
## -- Estimated Model of low for the Logit of Reference Group Membership
## 
##                 Estimate    Std Err  z-value  p-value   Lower 95%   Upper 95%
##    (Intercept)   -1.0076     0.8617   -1.169    0.242     -2.6964      0.6813 
## factor(smoke)1    1.1006     0.3719    2.959    0.003      0.3716      1.8295 
##            age   -0.0349     0.0334   -1.044    0.296     -0.1004      0.0306 
##  factor(race)2    1.0114     0.4934    2.050    0.040      0.0443      1.9785 
##  factor(race)3    1.0567     0.4060    2.603    0.009      0.2611      1.8524 
## 
## 
## -- Odds Ratios and Confidence Intervals
## 
##                 Odds Ratio   Lower 95%   Upper 95%
##    (Intercept)      0.3651      0.0674      1.9764 
## factor(smoke)1      3.0058      1.4500      6.2311 
##            age      0.9657      0.9045      1.0311 
##  factor(race)2      2.7495      1.0453      7.2319 
##  factor(race)3      2.8769      1.2983      6.3751 
## 
## 
## -- Model Fit
## 
##     Null deviance: 234.672 on 188 degrees of freedom
## Residual deviance: 218.862 on 184 degrees of freedom
## 
## AIC: 228.8622 
## 
## Number of iterations to convergence: 4 
## 
## 
## Collinearity
## 
##       Tolerance       VIF
## smoke     0.874     1.144
## age       0.945     1.058
## race      0.123     8.124

VIEC 4

bw1 = bw[, c("low", "age", "lwt", "race", "smoke", "ptl", "ht", "ui", "ftv")]
head(bw1)

##   low age lwt race smoke ptl ht ui ftv
## 1   0  19 182    2     0   0  0  1   0
## 2   0  33 155    3     0   0  0  0   3
## 3   0  20 105    1     1   0  0  0   1
## 4   0  21 108    1     1   0  0  1   2
## 5   0  18 107    1     1   0  0  1   0
## 6   0  21 124    3     0   0  0  0   0

library(BMA)

## Loading required package: survival

## Loading required package: leaps

## Loading required package: robustbase

## 
## Attaching package: 'robustbase'

## The following object is masked from 'package:survival':
## 
##     heart

## Loading required package: inline

## Loading required package: rrcov

## Scalable Robust Estimators with High Breakdown Point (version 1.7-6)

xvars = bw1[, -1]
yvar = bw1[, 1]
m.bma = bic.glm(xvars, yvar, strict = FALSE, OR = 20, glm.family = binomial)
summary(m.bma)

## 
## Call:
## bic.glm.data.frame(x = xvars, y = yvar, glm.family = binomial,     strict = FALSE, OR = 20)
## 
## 
##   84  models were selected
##  Best  5  models (cumulative posterior probability =  0.2531 ): 
## 
##            p!=0   EV        SD        model 1     model 2     model 3   
## Intercept  100   -0.390128  1.575728     1.45068     1.09291    -2.32488
## age        10.4  -0.004815  0.018070       .           .           .    
## lwt        54.8  -0.008473  0.009253    -0.01865    -0.01707       .    
## race       44.3   0.212462  0.280368       .           .         0.55898
## smoke      52.1   0.484523  0.552668       .           .         1.11668
## ptl        41.2   0.291512  0.410590       .         0.72560       .    
## ht         59.7   1.011382  0.999519     1.85551     1.85604       .    
## ui         30.0   0.263111  0.470489       .           .           .    
## ftv         2.0  -0.001015  0.024588       .           .           .    
##                                                                         
## nVar                                       2           3           2    
## BIC                                   -753.82285  -753.75940  -753.62525
## post prob                                0.058       0.056       0.052  
##            model 4     model 5   
## Intercept    -0.35754     1.06795
## age             .           .    
## lwt          -0.01535    -0.01692
## race          0.48955       .    
## smoke         1.08002       .    
## ptl             .           .    
## ht            1.74427     1.96157
## ui              .         0.93000
## ftv             .           .    
##                                  
## nVar            4           3    
## BIC        -753.44086  -753.11035
## post prob     0.048       0.040

imageplot.bma(m.bma)

KHCT Ngay 5

2025-01-06