hip=read.csv("C:\\Users\\Admin\\Desktop\\thong ke\\Datasets for practice\\Hip fracture data.csv", na.strings = "")
head(hip)
##   id     dov gender age      dob visit   v1   v2    v3    v4 wt bmi  ht v5
## 1  3 15/6/89   Male  73   8/6/16     1 0.98 0.88 1.079 1.458 98  32 175 NA
## 2  8 17/4/89 Female  67 11/12/21     1 0.85 0.85 0.966 1.325 72  26 166 18
## 3  9 12/6/90   Male  68   8/1/22     1 0.87 0.84 1.013 1.494 87  26 184 36
## 4 10  4/6/90 Female  62  15/5/28     1 0.62 0.71 0.839 1.214 72  24 173 NA
## 5 23  8/8/89   Male  61  22/9/28     1 0.87 0.60 0.811 1.144 72  24 173 44
## 6 24  3/5/89 Female  76   1/8/13     1 0.76 0.58 0.743 0.980 67  28 156 15
##     v6 v7 v8 v9 hipfx timehip
## 1 39.9  1  0  0     0    0.55
## 2 31.0  0  0  0     0   19.68
## 3 28.6  0  0  0     0    5.05
## 4 28.2  1  0  0     0   18.55
## 5 28.9  1  0  0     0   19.37
## 6 33.3  0  0  0     0   12.30

#co su khac biet cua Nam va nu ty le gay xuong dui ko

table(hip$hipfx, hip$gender)
##    
##     Female Male
##   0   1512 1087
##   1    142   47
142/(1512+142)
## [1] 0.08585248
47/(1087+47)
## [1] 0.04144621
library(DescTools)
Desc(hip$hipfx ~ hip$gender)
## ------------------------------------------------------------------------- 
## hip$hipfx ~ hip$gender
## 
## 
## Summary: 
## n: 3e+03, rows: 2e+00, columns: 2e+00
## 
## Pearson's Chi-squared test (cont. adj):
##   X-squared = 20.296, df = 1, p-value = 6.635e-06
## Fisher's exact test p-value = 3.505e-06
## McNemar's chi-squared = 725.09, df = 1, p-value < 2.2e-16
## 
##                     estimate lwr.ci upr.ci'
##                                           
## odds ratio             0.460  0.328  0.646
## rel. risk (col1)       0.774  0.709  0.846
## rel. risk (col2)       1.682  1.307  2.164
## 
## 
## Phi-Coefficient        0.087
## Contingency Coeff.     0.086
## Cramer's V             0.087
## 
##                                              
##             hip$gender   Female   Male    Sum
## hip$hipfx                                    
##                                              
## 0           freq          2e+03  1e+03  3e+03
##             perc          54.2%  39.0%  93.2%
##             p.row         58.2%  41.8%      .
##             p.col         91.4%  95.9%      .
##                                              
## 1           freq          1e+02  5e+01  2e+02
##             perc           5.1%   1.7%   6.8%
##             p.row         75.1%  24.9%      .
##             p.col          8.6%   4.1%      .
##                                              
## Sum         freq          2e+03  1e+03  3e+03
##             perc          59.3%  40.7% 100.0%
##             p.row             .      .      .
##             p.col             .      .      .
##                                              
## 
## ----------
## ' 95% conf. level

m=glm(hipfx~gender, data = hip, family = binomial)
library(epiDisplay)
## Loading required package: foreign
## Loading required package: survival
## Loading required package: MASS
## Loading required package: nnet
logistic.display(m)
## 
## Logistic regression predicting hipfx 
##  
##                        OR(95%CI)         P(Wald's test) P(LR-test)
## gender: Male vs Female 0.46 (0.33,0.65)  < 0.001        < 0.001   
##                                                                   
## Log-likelihood = -679.98
## No. of observations = 2788
## AIC value = 1363.96
boxplot(hip$bmi~hip$hipfx)

#BIM co lien quan den gay xuong dui ko

m2=glm(hipfx~bmi, data = hip, family = binomial)
logistic.display(m2)
## 
## Logistic regression predicting hipfx 
##  
##                  OR(95%CI)         P(Wald's test) P(LR-test)
## bmi (cont. var.) 0.84 (0.81,0.88)  < 0.001        < 0.001   
##                                                             
## Log-likelihood = -615.9691
## No. of observations = 2754
## AIC value = 1235.9381
##khi BMI tăng 1 don vi thi Odds giam 16%

#lien quan mat do xuong va gay xuong

boxplot(hip$v2~hip$hipfx, col=c("blue", "red"))

#CO 1 OUT LINER NEN PHAI LOAI BO

hh=subset(hip, v2<2)
boxplot(hh$v2~hh$hipfx, col=c("blue","red"))

summary(hh$v2)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.1700  0.5800  0.6900  0.6947  0.8000  1.9000
hh$v2.n=hh$v2/0.1
m3=glm(hipfx ~ v2.n, data =hh, family = binomial)
logistic.display(m3)
## 
## Logistic regression predicting hipfx 
##  
##                   OR(95%CI)         P(Wald's test) P(LR-test)
## v2.n (cont. var.) 0.41 (0.36,0.47)  < 0.001        < 0.001   
##                                                              
## Log-likelihood = -525.8766
## No. of observations = 2722
## AIC value = 1055.7531
##xac dinh tinh phan dinh dinh dua tren chi so auc mât do xuong va gay xuong
library(pROC)
## Type 'citation("pROC")' for a citation.
## 
## Attaching package: 'pROC'
## The following object is masked from 'package:epiDisplay':
## 
##     ci
## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var
hh$predicted= predict(m3, type = "response")
mm=roc(hh$hipfx, hh$predicted)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
auc(mm)
## Area under the curve: 0.8249
plot(mm)

ci(mm)
## 95% CI: 0.7944-0.8554 (DeLong)

#xem voi bien BIM voi xac suat trong ty le gay xuong ##co gia tri BIM cua benh nhan ko co nen ta phai loai bo nhung ng benh có gia tri <0

summary(hh$bmi)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   15.00   24.00   26.00   26.64   29.00   57.00       7
bb=subset(hh, bmi>0)
m3=glm(hipfx ~ bmi, data =bb, family = binomial)
bb$predicted= predict(m3, type = "response")
mm=roc(bb$hipfx, bb$predicted)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
auc(mm)
## Area under the curve: 0.6809
plot(mm)

ci(mm)
## 95% CI: 0.6406-0.7213 (DeLong)
plot(smooth(mm))

cau ket luan la dien tich của duong cong 0.6809 neu 0.98 thi muc do phan dich cau ng gay xuong va ko gay xuong la excellen.

#so sanh ty le hay nguy co gay xuong giua nam va nu
##Nam co nguy co gay xuong thap hon nua
m=glm(hipfx~gender, data = hip, family = binomial)
summary(m)
## 
## Call:
## glm(formula = hipfx ~ gender, family = binomial, data = hip)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.4237  -0.4237  -0.4237  -0.2910   2.5232  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -2.36536    0.08777 -26.949  < 2e-16 ***
## genderMale  -0.77567    0.17289  -4.486 7.24e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1382.2  on 2787  degrees of freedom
## Residual deviance: 1360.0  on 2786  degrees of freedom
## AIC: 1364
## 
## Number of Fisher Scoring iterations: 5
#mat do xuong cao ==>nguy co gay xuong giam
##chưng minh nam co mat do xuong cao hon nu
##nam mat do xuong cao hon nu
t.test(hh$v2~hh$gender)
## 
##  Welch Two Sample t-test
## 
## data:  hh$v2 by hh$gender
## t = -11.844, df = 2360.7, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.08835424 -0.06325396
## sample estimates:
## mean in group Female   mean in group Male 
##            0.6639450            0.7397491
#lieu mat do xuong co lien quan den bmi ko
m4=glm(hipfx ~ v2.n+gender, data =hh, family = binomial)
logistic.display(m4)
## 
## Logistic regression predicting hipfx 
##  
##                        crude OR(95%CI)   adj. OR(95%CI)    P(Wald's test)
## v2.n (cont. var.)      0.41 (0.36,0.47)  0.41 (0.36,0.47)  < 0.001       
##                                                                          
## gender: Male vs Female 0.49 (0.35,0.69)  0.85 (0.58,1.23)  0.376         
##                                                                          
##                        P(LR-test)
## v2.n (cont. var.)      < 0.001   
##                                  
## gender: Male vs Female 0.373     
##                                  
## Log-likelihood = -525.4792
## No. of observations = 2722
## AIC value = 1056.9584
#ket qua cho that sau khi hieu ching cho mat do xuong, su khac biet giua nam va nũ ko co ý nghĩa

sau khi hieu chinh voi BMI thi mat do xuong dong lap vơi BIM nhung ko dong lap với BMD

hh=na.omit(hh)
xvars = hh[,c("gender","age", "v1", "v2", "v3", "v4", "v5", "v6","v7", "v8", "v9", "bmi")]
yvars=hh[,"hipfx"]
library(BMA)
## Loading required package: leaps
## Loading required package: robustbase
## 
## Attaching package: 'robustbase'
## The following object is masked from 'package:survival':
## 
##     heart
## Loading required package: inline
## Loading required package: rrcov
## Scalable Robust Estimators with High Breakdown Point (version 1.4-7)
## 
## Attaching package: 'rrcov'
## The following object is masked from 'package:DescTools':
## 
##     Cov
m=bic.glm(xvars, yvars, strict=F, OR=20, glm.family="binomial")
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(m)
## 
## Call:
## bic.glm.data.frame(x = xvars, y = yvars, glm.family = "binomial",     strict = F, OR = 20)
## 
## 
##   7  models were selected
##  Best  5  models (cumulative posterior probability =  0.9371 ): 
## 
##                p!=0    EV         SD        model 1     model 2   
## Intercept      100    -1.1497190  1.368386  -1.021e+00  -1.200e+00
## gender.x         3.0                                              
##         .Male          0.0153228  0.096747       .           .    
## age.x          100.0   0.0771649  0.013818   7.658e-02   7.963e-02
## v1.x            24.1  -0.6892902  1.357276       .      -2.528e+00
## v2.x            13.0  -0.5054618  1.467008       .           .    
## v3.x            95.4  -7.9421840  2.286455  -9.064e+00  -6.775e+00
## v4.x             0.0   0.0000000  0.000000       .           .    
## v5.x             5.6  -0.0009949  0.004752       .           .    
## v6.x             3.3   0.0011420  0.007505       .           .    
## v7.x             0.0   0.0000000  0.000000       .           .    
## v8.x             0.0   0.0000000  0.000000       .           .    
## v9.x             0.0   0.0000000  0.000000       .           .    
## bmi.x            0.0   0.0000000  0.000000       .           .    
##                                                                   
## nVar                                           2           3      
## BIC                                         -1.707e+04  -1.707e+04
## post prob                                    0.586       0.166    
##                model 3     model 4     model 5   
## Intercept      -1.223e+00  -5.573e-01  -2.551e+00
## gender.x                                         
##         .Male       .           .           .    
## age.x           7.476e-02   7.129e-02   8.253e-02
## v1.x                .           .      -3.621e+00
## v2.x           -3.033e+00       .      -5.500e+00
## v3.x           -6.269e+00  -8.635e+00       .    
## v4.x                .           .           .    
## v5.x                .      -1.773e-02       .    
## v6.x                .           .           .    
## v7.x                .           .           .    
## v8.x                .           .           .    
## v9.x                .           .           .    
## bmi.x               .           .           .    
##                                                  
## nVar              3           3           3      
## BIC            -1.707e+04  -1.707e+04  -1.707e+04
## post prob       0.084       0.056       0.046
imageplot.bma(m)

BMA cho biet bien nao lien quan là age và V3 #danh gia mo hinh voi age va V3

fmodel=glm(hipfx~age+v3, family=binomial, data=hh)
#tinh gia tri tien luong
hh$predicted=predict(fmodel, type="response")
mm=roc(hh$hipfx, hh$predicted)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
auc(mm)
## Area under the curve: 0.8645
ci(mm)
## 95% CI: 0.8359-0.893 (DeLong)
plot(smooth(mm))

m4=glm(hipfx ~ age+v3, data =hh, family = binomial)
logistic.display(m4)
## 
## Logistic regression predicting hipfx 
##  
##                  crude OR(95%CI)   adj. OR(95%CI)    P(Wald's test)
## age (cont. var.) 1.15 (1.12,1.17)  1.08 (1.05,1.11)  < 0.001       
##                                                                    
## v3 (cont. var.)  0 (0,0)           0 (0,0)           < 0.001       
##                                                                    
##                  P(LR-test)
## age (cont. var.) < 0.001   
##                            
## v3 (cont. var.)  < 0.001   
##                            
## Log-likelihood = -419.4374
## No. of observations = 2315
## AIC value = 844.8749