#Việc 1. Đánh giá liên quan giữa trình độ học vấn và nhận xét về lí do TT Clinton tái đắc cử 1996
df = matrix(c(91, 104, 235, 39, 73, 48, 18, 31, 161), nrow = 3, byrow = TRUE)
head(df)
## [,1] [,2] [,3]
## [1,] 91 104 235
## [2,] 39 73 48
## [3,] 18 31 161
#1.2 Thực hiện phép kiểm ki bình phương
chisq.test(df)
##
## Pearson's Chi-squared test
##
## data: df
## X-squared = 86.023, df = 4, p-value < 2.2e-16
df= read.csv("D:\\Deskop\\Học R tại CT\\DỮ LIỆU ĐÍNH KÈM BÀI TẬP\\Diabetes data.csv")
head(df)
## id age sex height weight systBP diastBP brother parents hypertension bmi
## 1 1 54 Female 150 68.5 150 80 0 0 1 30.44
## 2 2 62 Female 165 75.5 170 80 0 0 1 27.73
## 3 3 49 Female 152 71.0 130 90 1 1 1 30.73
## 4 4 45 Female 154 58.0 110 90 0 1 1 24.46
## 5 5 54 Female 151 54.0 100 80 1 1 0 23.68
## 6 6 46 Female 156 76.0 120 80 0 1 0 31.23
## whr nrisks group
## 1 0.85 3 Normal
## 2 1.00 3 IFG
## 3 0.82 4 Normal
## 4 0.81 3 Normal
## 5 0.80 3 Normal
## 6 0.92 3 Normal
table(df$group)
##
## DM IFG Normal
## 242 243 2680
df$diab = ifelse(df$group == "DM", 1, 0)
head(df)
## id age sex height weight systBP diastBP brother parents hypertension bmi
## 1 1 54 Female 150 68.5 150 80 0 0 1 30.44
## 2 2 62 Female 165 75.5 170 80 0 0 1 27.73
## 3 3 49 Female 152 71.0 130 90 1 1 1 30.73
## 4 4 45 Female 154 58.0 110 90 0 1 1 24.46
## 5 5 54 Female 151 54.0 100 80 1 1 0 23.68
## 6 6 46 Female 156 76.0 120 80 0 1 0 31.23
## whr nrisks group diab
## 1 0.85 3 Normal 0
## 2 1.00 3 IFG 0
## 3 0.82 4 Normal 0
## 4 0.81 3 Normal 0
## 5 0.80 3 Normal 0
## 6 0.92 3 Normal 0
m.1 = glm(diab~ sex, family = binomial, data=df)
summary(m.1)
##
## Call:
## glm(formula = diab ~ sex, family = binomial, data = df)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.61745 0.08544 -30.637 < 2e-16 ***
## sexMale 0.35898 0.13757 2.609 0.00907 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1709.4 on 3164 degrees of freedom
## Residual deviance: 1702.7 on 3163 degrees of freedom
## AIC: 1706.7
##
## Number of Fisher Scoring iterations: 5
epiDisplay::logistic.display(m.1)
##
## Logistic regression predicting diab
##
## OR(95%CI) P(Wald's test) P(LR-test)
## sex (cont. var.) 1.43 (1.09,1.88) 0.009 0.01
##
## Log-likelihood = -851.3575
## No. of observations = 3165
## AIC value = 1706.715
m.0 =lessR::Logit(diab ~ sex, brief = TRUE, data = df)
##
## >>> Note: sex is not a numeric variable.
## Indicator variables are created and analyzed.
##
## Response Variable: diab
## Predictor Variable 1: sexMale
##
## Number of cases (rows) of data: 3165
## Number of cases retained for analysis: 3165
##
##
## BASIC ANALYSIS
##
## -- Estimated Model of diab for the Logit of Reference Group Membership
##
## Estimate Std Err z-value p-value Lower 95% Upper 95%
## (Intercept) -2.6174 0.0854 -30.637 0.000 -2.7849 -2.4500
## sexMale 0.3590 0.1376 2.609 0.009 0.0893 0.6286
##
##
## -- Odds Ratios and Confidence Intervals
##
## Odds Ratio Lower 95% Upper 95%
## (Intercept) 0.0730 0.0617 0.0863
## sexMale 1.4319 1.0935 1.8750
##
##
## -- Model Fit
##
## Null deviance: 1709.356 on 3164 degrees of freedom
## Residual deviance: 1702.715 on 3163 degrees of freedom
##
## AIC: 1706.715
##
## Number of iterations to convergence: 5
df$pred = predict(m.1, type = c("response"))
head(df)
## id age sex height weight systBP diastBP brother parents hypertension bmi
## 1 1 54 Female 150 68.5 150 80 0 0 1 30.44
## 2 2 62 Female 165 75.5 170 80 0 0 1 27.73
## 3 3 49 Female 152 71.0 130 90 1 1 1 30.73
## 4 4 45 Female 154 58.0 110 90 0 1 1 24.46
## 5 5 54 Female 151 54.0 100 80 1 1 0 23.68
## 6 6 46 Female 156 76.0 120 80 0 1 0 31.23
## whr nrisks group diab pred
## 1 0.85 3 Normal 0 0.06802406
## 2 1.00 3 IFG 0 0.06802406
## 3 0.82 4 Normal 0 0.06802406
## 4 0.81 3 Normal 0 0.06802406
## 5 0.80 3 Normal 0 0.06802406
## 6 0.92 3 Normal 0 0.06802406
library(pROC)
## Type 'citation("pROC")' for a citation.
##
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
##
## cov, smooth, var
g =roc(diab ~pred, data = df); plot(g)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
bw = read.csv ("D:\\Deskop\\Học R tại CT\\DỮ LIỆU ĐÍNH KÈM BÀI TẬP\\birthwt.csv")
dim(bw)
## [1] 189 11
bw1 = bw[, c("low", "age", "lwt", "race", "smoke", "ptl", "ht", "ui", "ftv")]
head(bw1)
## low age lwt race smoke ptl ht ui ftv
## 1 0 19 182 2 0 0 0 1 0
## 2 0 33 155 3 0 0 0 0 3
## 3 0 20 105 1 1 0 0 0 1
## 4 0 21 108 1 1 0 0 1 2
## 5 0 18 107 1 1 0 0 1 0
## 6 0 21 124 3 0 0 0 0 0
library(BMA)
## Loading required package: survival
## Loading required package: leaps
## Loading required package: robustbase
##
## Attaching package: 'robustbase'
## The following object is masked from 'package:survival':
##
## heart
## Loading required package: inline
## Loading required package: rrcov
## Scalable Robust Estimators with High Breakdown Point (version 1.7-6)
xvars = bw [,c( "age", "lwt", "race", "smoke", "ptl", "ht", "ui", "ftv")]
yvars = bw[,c("low")]
m.bma= bic.glm (xvars,yvars, strict=FALSE, OR=20, glm.family = binomial)
summary (m.bma)
##
## Call:
## bic.glm.data.frame(x = xvars, y = yvars, glm.family = binomial, strict = FALSE, OR = 20)
##
##
## 84 models were selected
## Best 5 models (cumulative posterior probability = 0.2531 ):
##
## p!=0 EV SD model 1 model 2 model 3
## Intercept 100 -0.390128 1.575728 1.45068 1.09291 -2.32488
## age 10.4 -0.004815 0.018070 . . .
## lwt 54.8 -0.008473 0.009253 -0.01865 -0.01707 .
## race 44.3 0.212462 0.280368 . . 0.55898
## smoke 52.1 0.484523 0.552668 . . 1.11668
## ptl 41.2 0.291512 0.410590 . 0.72560 .
## ht 59.7 1.011382 0.999519 1.85551 1.85604 .
## ui 30.0 0.263111 0.470489 . . .
## ftv 2.0 -0.001015 0.024588 . . .
##
## nVar 2 3 2
## BIC -753.82285 -753.75940 -753.62525
## post prob 0.058 0.056 0.052
## model 4 model 5
## Intercept -0.35754 1.06795
## age . .
## lwt -0.01535 -0.01692
## race 0.48955 .
## smoke 1.08002 .
## ptl . .
## ht 1.74427 1.96157
## ui . 0.93000
## ftv . .
##
## nVar 4 3
## BIC -753.44086 -753.11035
## post prob 0.048 0.040
imageplot.bma(m.bma)