df = read.csv("C:\\Thach\\VN trips\\2026_1Jan\\PN Institute\\Datasets\\Bone data.csv")
dim(df)
## [1] 2162 9
head(df)
## id sex age weight height prior.fx fnbmd smoking fx
## 1 1 Male 73 98 175 0 1.08 1 0
## 2 2 Female 68 72 166 0 0.97 0 0
## 3 3 Male 68 87 184 0 1.01 0 0
## 4 4 Female 62 72 173 0 0.84 1 0
## 5 5 Male 61 72 173 0 0.81 1 0
## 6 6 Female 76 57 156 0 0.74 0 0
model <- glm(fx ~ smoking, data = df, family = binomial)
summary(model)
##
## Call:
## glm(formula = fx ~ smoking, family = binomial, data = df)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.03006 0.06442 -15.990 <2e-16 ***
## smoking -0.13796 0.10081 -1.368 0.171
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2441.4 on 2161 degrees of freedom
## Residual deviance: 2439.5 on 2160 degrees of freedom
## AIC: 2443.5
##
## Number of Fisher Scoring iterations: 4
exp(cbind(Odds_Ratio = coef(model), confint(model)))
## Waiting for profiling to be done...
## Odds_Ratio 2.5 % 97.5 %
## (Intercept) 0.3569869 0.3142326 0.4045364
## smoking 0.8711365 0.7144024 1.0608018
library(epiDisplay)
## Warning: package 'epiDisplay' was built under R version 4.3.2
## Loading required package: foreign
## Loading required package: survival
## Loading required package: MASS
## Loading required package: nnet
logistic.display(model)
##
## Logistic regression predicting fx
##
## OR(95%CI) P(Wald's test) P(LR-test)
## smoking: 1 vs 0 0.87 (0.71,1.06) 0.171 0.17
##
## Log-likelihood = -1219.7469
## No. of observations = 2162
## AIC value = 2443.4938
library(lessR)
## Warning: package 'lessR' was built under R version 4.3.3
##
## lessR 4.3.9 feedback: gerbing@pdx.edu
## --------------------------------------------------------------
## > d <- Read("") Read text, Excel, SPSS, SAS, or R data file
## d is default data frame, data= in analysis routines optional
##
## Many examples of reading, writing, and manipulating data,
## graphics, testing means and proportions, regression, factor analysis,
## customization, and descriptive statistics from pivot tables
## Enter: browseVignettes("lessR")
##
## View lessR updates, now including time series forecasting
## Enter: news(package="lessR")
##
## Interactive data analysis
## Enter: interact()
model.lessR <- Logit(fx ~ smoking, data = df)
##
## Response Variable: fx
## Predictor Variable 1: smoking
##
## Number of cases (rows) of data: 2162
## Number of cases retained for analysis: 2162
##
##
## BASIC ANALYSIS
##
## -- Estimated Model of fx for the Logit of Reference Group Membership
##
## Estimate Std Err z-value p-value Lower 95% Upper 95%
## (Intercept) -1.0301 0.0644 -15.990 0.000 -1.1563 -0.9038
## smoking -0.1380 0.1008 -1.368 0.171 -0.3355 0.0596
##
##
## -- Odds Ratios and Confidence Intervals
##
## Odds Ratio Lower 95% Upper 95%
## (Intercept) 0.3570 0.3146 0.4050
## smoking 0.8711 0.7149 1.0614
##
##
## -- Model Fit
##
## Null deviance: 2441.375 on 2161 degrees of freedom
## Residual deviance: 2439.494 on 2160 degrees of freedom
##
## AIC: 2443.494
##
## Number of iterations to convergence: 4
##
##
## ANALYSIS OF RESIDUALS AND INFLUENCE
## Data, Fitted, Residual, Studentized Residual, Dffits, Cook's Distance
## [sorted by Cook's Distance]
## [res_rows = 20 out of 2162 cases (rows) of data]
## --------------------------------------------------------------------
## smoking fx P(Y=1) residual rstudent dffits cooks
## 36 1 1 0.2372 0.7628 1.697 0.05273 0.001753
## 39 1 1 0.2372 0.7628 1.697 0.05273 0.001753
## 55 1 1 0.2372 0.7628 1.697 0.05273 0.001753
## 56 1 1 0.2372 0.7628 1.697 0.05273 0.001753
## 63 1 1 0.2372 0.7628 1.697 0.05273 0.001753
## 65 1 1 0.2372 0.7628 1.697 0.05273 0.001753
## 120 1 1 0.2372 0.7628 1.697 0.05273 0.001753
## 129 1 1 0.2372 0.7628 1.697 0.05273 0.001753
## 133 1 1 0.2372 0.7628 1.697 0.05273 0.001753
## 135 1 1 0.2372 0.7628 1.697 0.05273 0.001753
## 141 1 1 0.2372 0.7628 1.697 0.05273 0.001753
## 146 1 1 0.2372 0.7628 1.697 0.05273 0.001753
## 150 1 1 0.2372 0.7628 1.697 0.05273 0.001753
## 163 1 1 0.2372 0.7628 1.697 0.05273 0.001753
## 169 1 1 0.2372 0.7628 1.697 0.05273 0.001753
## 173 1 1 0.2372 0.7628 1.697 0.05273 0.001753
## 181 1 1 0.2372 0.7628 1.697 0.05273 0.001753
## 182 1 1 0.2372 0.7628 1.697 0.05273 0.001753
## 197 1 1 0.2372 0.7628 1.697 0.05273 0.001753
## 203 1 1 0.2372 0.7628 1.697 0.05273 0.001753
##
##
## PREDICTION
##
## Probability threshold for classification : 0.5
##
##
## Data, Fitted Values, Standard Errors
## [sorted by fitted value]
## [pred_all=TRUE to see all intervals displayed]
## --------------------------------------------------------------------
## smoking fx label fitted std.err
## 1 1 0 0 0.2372 0.01403
## 4 1 0 0 0.2372 0.01403
## 5 1 0 0 0.2372 0.01403
## 7 1 0 0 0.2372 0.01403
##
## ... for the rows of data where fitted is close to 0.5 ...
##
## smoking fx label fitted std.err
## 2148 1 0 0 0.2372 0.01403
## 2150 1 1 0 0.2372 0.01403
## 2 0 0 0 0.2631 0.01249
## 3 0 0 0 0.2631 0.01249
## 6 0 0 0 0.2631 0.01249
##
## ... for the last 4 rows of sorted data ...
##
## smoking fx label fitted std.err
## 2159 0 0 0 0.2631 0.01249
## 2160 0 1 0 0.2631 0.01249
## 2161 0 0 0 0.2631 0.01249
## 2162 0 1 0 0.2631 0.01249
## --------------------------------------------------------------------
##
##
## ----------------------------
## Specified confusion matrices
## ----------------------------
##
## Probability threshold for predicting : 0.5
## Corresponding cutoff threshold for smoking: -7.467
##
## Baseline Predicted
## ---------------------------------------------------
## Total %Tot 0 1 %Correct
## ---------------------------------------------------
## 1 545 25.2 545 0 0.0
## fx 0 1617 74.8 1617 0 100.0
## ---------------------------------------------------
## Total 2162 74.8
##
## Accuracy: 74.79
## Sensitivity: 0.00
## Precision: NaN
summary(model.lessR)
##
## Call:
## glm(formula = my_formula, family = "binomial", data = data)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.03006 0.06442 -15.990 <0.0000000000000002 ***
## smoking -0.13796 0.10081 -1.368 0.171
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2441.4 on 2161 degrees of freedom
## Residual deviance: 2439.5 on 2160 degrees of freedom
## AIC: 2443.5
##
## Number of Fisher Scoring iterations: 4
PROMT: tôi có dữ liệu đánh giá nguy cơ gãy xương. Bạn giúp viết lệnh xây dựng mô hình hối qui logistic đánh giá mối liên quan giữa hút thuốc (smoking: 0= No; 1= Yes) và nguy cơ gãy xương (fx: 0= No; 1= Yes)
model.2 <- glm(fx ~ smoking + sex, data = df, family = binomial)
summary(model.2)
##
## Call:
## glm(formula = fx ~ smoking + sex, family = binomial, data = df)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.85598 0.06835 -12.523 < 0.0000000000000002 ***
## smoking 0.09872 0.10724 0.921 0.357
## sexMale -0.78880 0.11493 -6.863 0.00000000000673 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2441.4 on 2161 degrees of freedom
## Residual deviance: 2389.6 on 2159 degrees of freedom
## AIC: 2395.6
##
## Number of Fisher Scoring iterations: 4
exp(cbind(Odds_Ratio = coef(model.2), confint(model.2)))
## Waiting for profiling to be done...
## Odds_Ratio 2.5 % 97.5 %
## (Intercept) 0.4248646 0.3711574 0.4852461
## smoking 1.1037584 0.8942327 1.3617209
## sexMale 0.4543898 0.3619039 0.5680105
model2.lessR = Logit(fx ~ smoking + sex, data = df)
##
## >>> Note: sex is not a numeric variable.
## Indicator variables are created and analyzed.
##
## Response Variable: fx
## Predictor Variable 1: smoking
## Predictor Variable 2: sexMale
##
## Number of cases (rows) of data: 2162
## Number of cases retained for analysis: 2162
##
##
## BASIC ANALYSIS
##
## -- Estimated Model of fx for the Logit of Reference Group Membership
##
## Estimate Std Err z-value p-value Lower 95% Upper 95%
## (Intercept) -0.8560 0.0684 -12.523 0.000 -0.9900 -0.7220
## smoking 0.0987 0.1072 0.921 0.357 -0.1115 0.3089
## sexMale -0.7888 0.1149 -6.863 0.000 -1.0141 -0.5635
##
##
## -- Odds Ratios and Confidence Intervals
##
## Odds Ratio Lower 95% Upper 95%
## (Intercept) 0.4249 0.3716 0.4858
## smoking 1.1038 0.8945 1.3619
## sexMale 0.4544 0.3627 0.5692
##
##
## -- Model Fit
##
## Null deviance: 2441.375 on 2161 degrees of freedom
## Residual deviance: 2389.599 on 2159 degrees of freedom
##
## AIC: 2395.599
##
## Number of iterations to convergence: 4
##
##
## Collinearity
##
## Tolerance VIF
## smoking 0.899 1.112
## sexMale 0.899 1.112
##
## ANALYSIS OF RESIDUALS AND INFLUENCE
## Data, Fitted, Residual, Studentized Residual, Dffits, Cook's Distance
## [sorted by Cook's Distance]
## [res_rows = 20 out of 2162 cases (rows) of data]
## --------------------------------------------------------------------
## smoking sexMale fx P(Y=1) residual rstudent dffits cooks
## 18 0 1 1 0.1618 0.8382 1.911 0.07645 0.003063
## 183 0 1 1 0.1618 0.8382 1.911 0.07645 0.003063
## 212 0 1 1 0.1618 0.8382 1.911 0.07645 0.003063
## 217 0 1 1 0.1618 0.8382 1.911 0.07645 0.003063
## 221 0 1 1 0.1618 0.8382 1.911 0.07645 0.003063
## 263 0 1 1 0.1618 0.8382 1.911 0.07645 0.003063
## 308 0 1 1 0.1618 0.8382 1.911 0.07645 0.003063
## 323 0 1 1 0.1618 0.8382 1.911 0.07645 0.003063
## 491 0 1 1 0.1618 0.8382 1.911 0.07645 0.003063
## 545 0 1 1 0.1618 0.8382 1.911 0.07645 0.003063
## 561 0 1 1 0.1618 0.8382 1.911 0.07645 0.003063
## 563 0 1 1 0.1618 0.8382 1.911 0.07645 0.003063
## 628 0 1 1 0.1618 0.8382 1.911 0.07645 0.003063
## 680 0 1 1 0.1618 0.8382 1.911 0.07645 0.003063
## 744 0 1 1 0.1618 0.8382 1.911 0.07645 0.003063
## 762 0 1 1 0.1618 0.8382 1.911 0.07645 0.003063
## 770 0 1 1 0.1618 0.8382 1.911 0.07645 0.003063
## 812 0 1 1 0.1618 0.8382 1.911 0.07645 0.003063
## 821 0 1 1 0.1618 0.8382 1.911 0.07645 0.003063
## 951 0 1 1 0.1618 0.8382 1.911 0.07645 0.003063
##
##
## PREDICTION
##
## Probability threshold for classification : 0.5
##
##
## Data, Fitted Values, Standard Errors
## [sorted by fitted value]
## [pred_all=TRUE to see all intervals displayed]
## --------------------------------------------------------------------
## smoking sexMale fx label fitted std.err
## 3 0 1 0 0 0.1618 0.01548
## 14 0 1 0 0 0.1618 0.01548
## 18 0 1 1 0 0.1618 0.01548
## 24 0 1 0 0 0.1618 0.01548
##
## ... for the rows of data where fitted is close to 0.5 ...
##
## smoking sexMale fx label fitted std.err
## 2160 0 0 1 0 0.2982 0.01430
## 2162 0 0 1 0 0.2982 0.01430
## 4 1 0 0 0 0.3192 0.02074
## 20 1 0 0 0 0.3192 0.02074
## 32 1 0 0 0 0.3192 0.02074
##
## ... for the last 4 rows of sorted data ...
##
## smoking sexMale fx label fitted std.err
## 2146 1 0 1 0 0.3192 0.02074
## 2147 1 0 0 0 0.3192 0.02074
## 2148 1 0 0 0 0.3192 0.02074
## 2150 1 0 1 0 0.3192 0.02074
## --------------------------------------------------------------------
##
##
## ----------------------------
## Specified confusion matrices
## ----------------------------
##
## Probability threshold for predicting : 0.5
##
## Baseline Predicted
## ---------------------------------------------------
## Total %Tot 0 1 %Correct
## ---------------------------------------------------
## 1 545 25.2 545 0 0.0
## fx 0 1617 74.8 1617 0 100.0
## ---------------------------------------------------
## Total 2162 74.8
##
## Accuracy: 74.79
## Sensitivity: 0.00
## Precision: NaN
summary(model2.lessR)
##
## Call:
## glm(formula = my_formula, family = "binomial", data = data)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.85598 0.06835 -12.523 < 0.0000000000000002 ***
## smoking 0.09872 0.10724 0.921 0.357
## sexMale -0.78880 0.11493 -6.863 0.00000000000673 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2441.4 on 2161 degrees of freedom
## Residual deviance: 2389.6 on 2159 degrees of freedom
## AIC: 2395.6
##
## Number of Fisher Scoring iterations: 4
PROMPT: Y văn ghi nhận giới tính (sex) là yếu tố gây nhiễu. Bạn thực hiện mô hinh hồi qui logistic đa biến đánh giá mối liên quan độc lập giữa hút thuốc và nguy cơ gãy xương sau khi hiệu chỉnh cho giới tính.
library(BMA)
## Loading required package: leaps
## Loading required package: robustbase
##
## Attaching package: 'robustbase'
## The following object is masked from 'package:survival':
##
## heart
## Loading required package: inline
## Loading required package: rrcov
## Scalable Robust Estimators with High Breakdown Point (version 1.7-4)
df_bma <- na.omit(df[, c("fx", "age", "sex", "weight", "height", "fnbmd", "smoking", "prior.fx")])
bma_model <- bic.glm(
x = df_bma[, c("age", "sex", "weight", "height", "fnbmd", "smoking", "prior.fx")],
y = df_bma$fx,
glm.family = binomial()
)
summary(bma_model)
##
## Call:
## bic.glm.data.frame(x = df_bma[, c("age", "sex", "weight", "height", "fnbmd", "smoking", "prior.fx")], y = df_bma$fx, glm.family = binomial())
##
##
## 4 models were selected
## Best 4 models (cumulative posterior probability = 1 ):
##
## p!=0 EV SD model 1 model 2 model 3
## Intercept 100 2.249962 0.674258 2.50941 2.33985 1.13970
## age 16.2 0.002663 0.006838 . . 0.01602
## sex 18.2 -0.048048 0.114551 . -0.25782 .
## weight 0.0 0.000000 0.000000 . . .
## height 0.0 0.000000 0.000000 . . .
## fnbmd 100.0 -4.487636 0.436674 -4.59534 -4.27923 -4.30244
## smoking 0.0 0.000000 0.000000 . . .
## prior.fx 100.0 0.530729 0.134289 0.53062 0.54454 0.51562
##
## nVar 2 3 3
## BIC -14024.61986 -14021.44672 -14021.13922
## post prob 0.696 0.142 0.122
## model 4
## Intercept 0.79779
## age 0.01780
## sex -0.28507
## weight .
## height .
## fnbmd -3.91809
## smoking .
## prior.fx 0.52962
##
## nVar 4
## BIC -14018.89680
## post prob 0.040
imageplot.bma(bma_model)
df_bma$fnbmd.sd = df_bma$fnbmd/0.15
bma_best = glm(fx ~ fnbmd.sd + prior.fx, family = binomial, data = df_bma)
exp(cbind(Odds_Ratio = coef(bma_best), confint(bma_best)))
## Waiting for profiling to be done...
## Odds_Ratio 2.5 % 97.5 %
## (Intercept) 12.2977330 6.6858383 22.888616
## fnbmd.sd 0.5019265 0.4467839 0.562099
## prior.fx 1.6999821 1.3051619 2.207473
PROMPT: Xây dựng mô hình tối ưu dự báo gãy xương từ những biến số như tuổi (age), giới tính (sex), cân nặng (weight), chiều cao (height), mật độ xương (fnbmd), hút thuốc là (smoking) và tiền căn gãy xương (prior.fx). Dùng phương pháp Bayesian Model Averaging từ gói lệnh BMA