Doc du lieu: loai bo cac o trong trong excel
t = "C:\\Users\\SONY\\Google Drive\\HOC TAP\\CH 22\\Phan tich du lieu\\Workshop GS Tuan\\Phan tich du lieu va ung dung\\Datasets for practice\\Hip fracture data.csv"
hip = read.csv(t, na.strings = "")
attach(hip)
head(hip)
## id dov gender age dob visit v1 v2 v3 v4 wt bmi ht v5
## 1 3 15/6/89 Male 73 8/6/16 1 0.98 0.88 1.079 1.458 98 32 175 NA
## 2 8 17/4/89 Female 67 11/12/21 1 0.85 0.85 0.966 1.325 72 26 166 18
## 3 9 12/6/90 Male 68 8/1/22 1 0.87 0.84 1.013 1.494 87 26 184 36
## 4 10 4/6/90 Female 62 15/5/28 1 0.62 0.71 0.839 1.214 72 24 173 NA
## 5 23 8/8/89 Male 61 22/9/28 1 0.87 0.60 0.811 1.144 72 24 173 44
## 6 24 3/5/89 Female 76 1/8/13 1 0.76 0.58 0.743 0.980 67 28 156 15
## v6 v7 v8 v9 hipfx timehip
## 1 39.9 1 0 0 0 0.55
## 2 31.0 0 0 0 0 19.68
## 3 28.6 0 0 0 0 5.05
## 4 28.2 1 0 0 0 18.55
## 5 28.9 1 0 0 0 19.37
## 6 33.3 0 0 0 0 12.30
So sanh ty le gay xuong theo gioi tinh
library(DescTools)
options(scipen = 999)
Desc(hipfx ~ gender)
## -------------------------------------------------------------------------
## hipfx ~ gender
##
##
## Summary:
## n: 2'788, rows: 2, columns: 2
##
## Pearson's Chi-squared test (cont. adj):
## X-squared = 20.296, df = 1, p-value = 0.000006635
## Fisher's exact test p-value = 0.000003505
## McNemar's chi-squared = 725.09, df = 1, p-value < 0.00000000000000022
##
## estimate lwr.ci upr.ci'
##
## odds ratio 0.460 0.328 0.646
## rel. risk (col1) 0.774 0.709 0.846
## rel. risk (col2) 1.682 1.307 2.164
##
##
## Phi-Coefficient 0.087
## Contingency Coeff. 0.086
## Cramer's V 0.087
##
##
## gender Female Male Sum
## hipfx
##
## 0 freq 1'512 1'087 2'599
## perc 54.2% 39.0% 93.2%
## p.row 58.2% 41.8% .
## p.col 91.4% 95.9% .
##
## 1 freq 142 47 189
## perc 5.1% 1.7% 6.8%
## p.row 75.1% 24.9% .
## p.col 8.6% 4.1% .
##
## Sum freq 1'654 1'134 2'788
## perc 59.3% 40.7% 100.0%
## p.row . . .
## p.col . . .
##
##
## ----------
## ' 95% conf. level
Mo hinh hoi quy fx ~ gioi
library(BMA)
## Loading required package: survival
## Loading required package: leaps
## Loading required package: robustbase
##
## Attaching package: 'robustbase'
## The following object is masked from 'package:survival':
##
## heart
## Loading required package: inline
## Loading required package: rrcov
## Scalable Robust Estimators with High Breakdown Point (version 1.4-7)
##
## Attaching package: 'rrcov'
## The following object is masked from 'package:DescTools':
##
## Cov
library(epiDisplay)
## Loading required package: foreign
## Loading required package: MASS
## Loading required package: nnet
m1 = glm(hipfx ~ gender, family = binomial, data = hip)
logistic.display(m1)
##
## Logistic regression predicting hipfx
##
## OR(95%CI) P(Wald's test) P(LR-test)
## gender: Male vs Female 0.46 (0.33,0.65) < 0.001 < 0.001
##
## Log-likelihood = -679.98
## No. of observations = 2788
## AIC value = 1363.96
Bieu do tuong quan bmi the hipfx # BMI thap thi nguy co gay xuong cao
boxplot(bmi~ hipfx)
Loai outlier
hh = subset(hip, v2 < 2)
boxplot(hh$v2 ~ hh$hipfx, col = c("blue", "red"))
Tao bien moi v2 (phien giai gia tri mo hinh hoi quy)
library(epiDisplay)
hh$v2.n = hh$v2 / 0.1
m2 = glm(hipfx ~ v2.n, family = binomial, data = hh)
logistic.display(m2)
##
## Logistic regression predicting hipfx
##
## OR(95%CI) P(Wald's test) P(LR-test)
## v2.n (cont. var.) 0.41 (0.36,0.47) < 0.001 < 0.001
##
## Log-likelihood = -525.8766
## No. of observations = 2722
## AIC value = 1055.7531
Danh gia AUC cua mo hinh fx ~ v2.n
library(pROC)
## Type 'citation("pROC")' for a citation.
##
## Attaching package: 'pROC'
## The following object is masked from 'package:epiDisplay':
##
## ci
## The following objects are masked from 'package:stats':
##
## cov, smooth, var
hh$predicted = predict(m2, type = "response")
mm = roc(hh$hipfx, hh$predicted)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
auc(mm)
## Area under the curve: 0.8249
Ve bieu do ROC
plot(mm)
Danh gia mo hinh fx ~ bmi
bb = subset(hh, bmi > 0)
m3 = glm(hipfx ~ bmi, family = binomial, data = bb)
bb$predicted = predict(m3, type = "response")
mm1 = roc(bb$hipfx, bb$predicted)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
auc(mm1)
## Area under the curve: 0.6809
Ve bieu do
plot(mm1)