df = matrix(c(91, 104, 235, 39, 73, 48, 18, 31, 161), nrow = 3, byrow = TRUE)
chisq.test(df)
##
## Pearson's Chi-squared test
##
## data: df
## X-squared = 86.023, df = 4, p-value < 2.2e-16
df = read.csv("C:\\Thach\\VN trips\\2024_4Dec\\SIS Can Tho\\Datasets\\diabetes data.csv")
head(df)
## id age sex height weight systBP diastBP brother parents hypertension bmi
## 1 1 54 Female 150 68.5 150 80 0 0 1 30.44
## 2 2 62 Female 165 75.5 170 80 0 0 1 27.73
## 3 3 49 Female 152 71.0 130 90 1 1 1 30.73
## 4 4 45 Female 154 58.0 110 90 0 1 1 24.46
## 5 5 54 Female 151 54.0 100 80 1 1 0 23.68
## 6 6 46 Female 156 76.0 120 80 0 1 0 31.23
## whr nrisks group
## 1 0.85 3 Normal
## 2 1.00 3 IFG
## 3 0.82 4 Normal
## 4 0.81 3 Normal
## 5 0.80 3 Normal
## 6 0.92 3 Normal
df$diab = ifelse(df$group == "DM", 1, 0)
head(df)
## id age sex height weight systBP diastBP brother parents hypertension bmi
## 1 1 54 Female 150 68.5 150 80 0 0 1 30.44
## 2 2 62 Female 165 75.5 170 80 0 0 1 27.73
## 3 3 49 Female 152 71.0 130 90 1 1 1 30.73
## 4 4 45 Female 154 58.0 110 90 0 1 1 24.46
## 5 5 54 Female 151 54.0 100 80 1 1 0 23.68
## 6 6 46 Female 156 76.0 120 80 0 1 0 31.23
## whr nrisks group diab
## 1 0.85 3 Normal 0
## 2 1.00 3 IFG 0
## 3 0.82 4 Normal 0
## 4 0.81 3 Normal 0
## 5 0.80 3 Normal 0
## 6 0.92 3 Normal 0
library(lessR)
## Warning: package 'lessR' was built under R version 4.3.3
##
## lessR 4.3.9 feedback: gerbing@pdx.edu
## --------------------------------------------------------------
## > d <- Read("") Read text, Excel, SPSS, SAS, or R data file
## d is default data frame, data= in analysis routines optional
##
## Many examples of reading, writing, and manipulating data,
## graphics, testing means and proportions, regression, factor analysis,
## customization, and descriptive statistics from pivot tables
## Enter: browseVignettes("lessR")
##
## View lessR updates, now including time series forecasting
## Enter: news(package="lessR")
##
## Interactive data analysis
## Enter: interact()
m.1 = Logit(diab ~ sex, brief = TRUE, data = df)
##
## >>> Note: sex is not a numeric variable.
## Indicator variables are created and analyzed.
##
## Response Variable: diab
## Predictor Variable 1: sexMale
##
## Number of cases (rows) of data: 3165
## Number of cases retained for analysis: 3165
##
##
## BASIC ANALYSIS
##
## -- Estimated Model of diab for the Logit of Reference Group Membership
##
## Estimate Std Err z-value p-value Lower 95% Upper 95%
## (Intercept) -2.6174 0.0854 -30.637 0.000 -2.7849 -2.4500
## sexMale 0.3590 0.1376 2.609 0.009 0.0893 0.6286
##
##
## -- Odds Ratios and Confidence Intervals
##
## Odds Ratio Lower 95% Upper 95%
## (Intercept) 0.0730 0.0617 0.0863
## sexMale 1.4319 1.0935 1.8750
##
##
## -- Model Fit
##
## Null deviance: 1709.356 on 3164 degrees of freedom
## Residual deviance: 1702.715 on 3163 degrees of freedom
##
## AIC: 1706.715
##
## Number of iterations to convergence: 5
bw = read.csv("C:\\Thach\\VN trips\\2024_4Dec\\SIS Can Tho\\Datasets\\birthwt.csv")
head(bw)
## id low age lwt race smoke ptl ht ui ftv bwt
## 1 85 0 19 182 2 0 0 0 1 0 2523
## 2 86 0 33 155 3 0 0 0 0 3 2551
## 3 87 0 20 105 1 1 0 0 0 1 2557
## 4 88 0 21 108 1 1 0 0 1 2 2594
## 5 89 0 18 107 1 1 0 0 1 0 2600
## 6 91 0 21 124 3 0 0 0 0 0 2622
library(lessR)
Logit(low ~ factor(smoke), data = bw, brief = TRUE)
##
## Response Variable: low
## Predictor Variable 1: smoke
##
## Number of cases (rows) of data: 189
## Number of cases retained for analysis: 189
##
##
## BASIC ANALYSIS
##
## -- Estimated Model of low for the Logit of Reference Group Membership
##
## Estimate Std Err z-value p-value Lower 95% Upper 95%
## (Intercept) -1.0871 0.2147 -5.062 0.000 -1.5079 -0.6662
## factor(smoke)1 0.7041 0.3196 2.203 0.028 0.0776 1.3305
##
##
## -- Odds Ratios and Confidence Intervals
##
## Odds Ratio Lower 95% Upper 95%
## (Intercept) 0.3372 0.2214 0.5137
## factor(smoke)1 2.0219 1.0807 3.7831
##
##
## -- Model Fit
##
## Null deviance: 234.672 on 188 degrees of freedom
## Residual deviance: 229.805 on 187 degrees of freedom
##
## AIC: 233.8046
##
## Number of iterations to convergence: 4
Logit(low ~ factor(smoke) + age + factor(race), data = bw, brief = TRUE)
##
## Response Variable: low
## Predictor Variable 1: smoke
## Predictor Variable 2: age
## Predictor Variable 3: race
##
## Number of cases (rows) of data: 189
## Number of cases retained for analysis: 189
##
##
## BASIC ANALYSIS
##
## -- Estimated Model of low for the Logit of Reference Group Membership
##
## Estimate Std Err z-value p-value Lower 95% Upper 95%
## (Intercept) -1.0076 0.8617 -1.169 0.242 -2.6964 0.6813
## factor(smoke)1 1.1006 0.3719 2.959 0.003 0.3716 1.8295
## age -0.0349 0.0334 -1.044 0.296 -0.1004 0.0306
## factor(race)2 1.0114 0.4934 2.050 0.040 0.0443 1.9785
## factor(race)3 1.0567 0.4060 2.603 0.009 0.2611 1.8524
##
##
## -- Odds Ratios and Confidence Intervals
##
## Odds Ratio Lower 95% Upper 95%
## (Intercept) 0.3651 0.0674 1.9764
## factor(smoke)1 3.0058 1.4500 6.2311
## age 0.9657 0.9045 1.0311
## factor(race)2 2.7495 1.0453 7.2319
## factor(race)3 2.8769 1.2983 6.3751
##
##
## -- Model Fit
##
## Null deviance: 234.672 on 188 degrees of freedom
## Residual deviance: 218.862 on 184 degrees of freedom
##
## AIC: 228.8622
##
## Number of iterations to convergence: 4
##
##
## Collinearity
##
## Tolerance VIF
## smoke 0.874 1.144
## age 0.945 1.058
## race 0.123 8.124
Chọn tập tin mới loại bỏ ID và weight
bw1 = bw[, c("low", "age", "lwt", "race", "smoke", "ptl", "ht", "ui", "ftv")]
head(bw1)
## low age lwt race smoke ptl ht ui ftv
## 1 0 19 182 2 0 0 0 1 0
## 2 0 33 155 3 0 0 0 0 3
## 3 0 20 105 1 1 0 0 0 1
## 4 0 21 108 1 1 0 0 1 2
## 5 0 18 107 1 1 0 0 1 0
## 6 0 21 124 3 0 0 0 0 0
Phương pháp stepwise
m.0 = glm(low ~ ., family = binomial, data = bw1)
step(m.0)
## Start: AIC=222.19
## low ~ age + lwt + race + smoke + ptl + ht + ui + ftv
##
## Df Deviance AIC
## - ftv 1 204.33 220.33
## - age 1 205.18 221.18
## <none> 204.19 222.19
## - ui 1 206.58 222.58
## - ptl 1 206.72 222.72
## - lwt 1 208.03 224.03
## - race 1 208.75 224.75
## - smoke 1 209.89 225.89
## - ht 1 211.52 227.52
##
## Step: AIC=220.33
## low ~ age + lwt + race + smoke + ptl + ht + ui
##
## Df Deviance AIC
## - age 1 205.21 219.21
## <none> 204.33 220.33
## - ui 1 206.67 220.67
## - ptl 1 206.83 220.83
## - lwt 1 208.04 222.04
## - race 1 208.77 222.77
## - smoke 1 209.91 223.91
## - ht 1 211.53 225.53
##
## Step: AIC=219.21
## low ~ lwt + race + smoke + ptl + ht + ui
##
## Df Deviance AIC
## <none> 205.21 219.21
## - ptl 1 207.34 219.34
## - ui 1 207.81 219.81
## - lwt 1 209.58 221.58
## - race 1 210.31 222.31
## - smoke 1 211.17 223.17
## - ht 1 212.61 224.61
##
## Call: glm(formula = low ~ lwt + race + smoke + ptl + ht + ui, family = binomial,
## data = bw1)
##
## Coefficients:
## (Intercept) lwt race smoke ptl ht
## -0.80364 -0.01295 0.46913 0.94817 0.49145 1.83316
## ui
## 0.74794
##
## Degrees of Freedom: 188 Total (i.e. Null); 182 Residual
## Null Deviance: 234.7
## Residual Deviance: 205.2 AIC: 219.2
library(BMA)
## Loading required package: survival
## Loading required package: leaps
## Loading required package: robustbase
##
## Attaching package: 'robustbase'
## The following object is masked from 'package:survival':
##
## heart
## Loading required package: inline
## Loading required package: rrcov
## Scalable Robust Estimators with High Breakdown Point (version 1.7-4)
xvars = bw1[, -1]
yvar = bw1[, 1]
m.bma = bic.glm(xvars, yvar, strict = FALSE, OR = 20, glm.family = binomial)
summary(m.bma)
##
## Call:
## bic.glm.data.frame(x = xvars, y = yvar, glm.family = binomial, strict = FALSE, OR = 20)
##
##
## 84 models were selected
## Best 5 models (cumulative posterior probability = 0.2531 ):
##
## p!=0 EV SD model 1 model 2 model 3
## Intercept 100 -0.390128 1.575728 1.45068 1.09291 -2.32488
## age 10.4 -0.004815 0.018070 . . .
## lwt 54.8 -0.008473 0.009253 -0.01865 -0.01707 .
## race 44.3 0.212462 0.280368 . . 0.55898
## smoke 52.1 0.484523 0.552668 . . 1.11668
## ptl 41.2 0.291512 0.410590 . 0.72560 .
## ht 59.7 1.011382 0.999519 1.85551 1.85604 .
## ui 30.0 0.263111 0.470489 . . .
## ftv 2.0 -0.001015 0.024588 . . .
##
## nVar 2 3 2
## BIC -753.82285 -753.75940 -753.62525
## post prob 0.058 0.056 0.052
## model 4 model 5
## Intercept -0.35754 1.06795
## age . .
## lwt -0.01535 -0.01692
## race 0.48955 .
## smoke 1.08002 .
## ptl . .
## ht 1.74427 1.96157
## ui . 0.93000
## ftv . .
##
## nVar 4 3
## BIC -753.44086 -753.11035
## post prob 0.048 0.040
imageplot.bma(m.bma)