ob = read.csv("C:\\Users\\Admin\\Desktop\\TL tập huấn NCKH\\Obesity data.csv")
library(table1)
##
## Attaching package: 'table1'
## The following objects are masked from 'package:base':
##
## units, units<-
table1(~ age + gender + weight + height + pcfat + hypertension + diabetes, data = ob)
| Overall (N=1217) |
|
|---|---|
| age | |
| Mean (SD) | 47.2 (17.3) |
| Median [Min, Max] | 48.0 [13.0, 88.0] |
| gender | |
| F | 862 (70.8%) |
| M | 355 (29.2%) |
| weight | |
| Mean (SD) | 55.1 (9.40) |
| Median [Min, Max] | 54.0 [34.0, 95.0] |
| height | |
| Mean (SD) | 157 (7.98) |
| Median [Min, Max] | 155 [136, 185] |
| pcfat | |
| Mean (SD) | 31.6 (7.18) |
| Median [Min, Max] | 32.4 [9.20, 48.4] |
| hypertension | |
| Mean (SD) | 0.507 (0.500) |
| Median [Min, Max] | 1.00 [0, 1.00] |
| diabetes | |
| Mean (SD) | 0.111 (0.314) |
| Median [Min, Max] | 0 [0, 1.00] |
ob$hyper = as.factor(ob$hypertension)
ob$diab = as.factor(ob$diabetes)
table1(~ age + gender + weight + height + pcfat + hypertension + hyper + diabetes + diab, data = ob)
| Overall (N=1217) |
|
|---|---|
| age | |
| Mean (SD) | 47.2 (17.3) |
| Median [Min, Max] | 48.0 [13.0, 88.0] |
| gender | |
| F | 862 (70.8%) |
| M | 355 (29.2%) |
| weight | |
| Mean (SD) | 55.1 (9.40) |
| Median [Min, Max] | 54.0 [34.0, 95.0] |
| height | |
| Mean (SD) | 157 (7.98) |
| Median [Min, Max] | 155 [136, 185] |
| pcfat | |
| Mean (SD) | 31.6 (7.18) |
| Median [Min, Max] | 32.4 [9.20, 48.4] |
| hypertension | |
| Mean (SD) | 0.507 (0.500) |
| Median [Min, Max] | 1.00 [0, 1.00] |
| hyper | |
| 0 | 600 (49.3%) |
| 1 | 617 (50.7%) |
| diabetes | |
| Mean (SD) | 0.111 (0.314) |
| Median [Min, Max] | 0 [0, 1.00] |
| diab | |
| 0 | 1082 (88.9%) |
| 1 | 135 (11.1%) |
library(table1)
table1(~ age + weight + height + pcfat, data = ob, render.continuous = c(. = "Mean (SD)", . = "Median [Q1, Q3]"))
| Overall (N=1217) |
|
|---|---|
| age | |
| Mean (SD) | 47.2 (17.3) |
| Median [Q1, Q3] | 48.0 [35.0, 58.0] |
| weight | |
| Mean (SD) | 55.1 (9.40) |
| Median [Q1, Q3] | 54.0 [49.0, 61.0] |
| height | |
| Mean (SD) | 157 (7.98) |
| Median [Q1, Q3] | 155 [151, 162] |
| pcfat | |
| Mean (SD) | 31.6 (7.18) |
| Median [Q1, Q3] | 32.4 [27.0, 36.8] |
table1(~ age + weight + height + pcfat + hyper + diab | gender, data = ob)
| F (N=862) |
M (N=355) |
Overall (N=1217) |
|
|---|---|---|---|
| age | |||
| Mean (SD) | 48.6 (16.4) | 43.7 (18.8) | 47.2 (17.3) |
| Median [Min, Max] | 49.0 [14.0, 85.0] | 44.0 [13.0, 88.0] | 48.0 [13.0, 88.0] |
| weight | |||
| Mean (SD) | 52.3 (7.72) | 62.0 (9.59) | 55.1 (9.40) |
| Median [Min, Max] | 51.0 [34.0, 95.0] | 62.0 [38.0, 95.0] | 54.0 [34.0, 95.0] |
| height | |||
| Mean (SD) | 153 (5.55) | 165 (6.73) | 157 (7.98) |
| Median [Min, Max] | 153 [136, 170] | 165 [146, 185] | 155 [136, 185] |
| pcfat | |||
| Mean (SD) | 34.7 (5.19) | 24.2 (5.76) | 31.6 (7.18) |
| Median [Min, Max] | 34.7 [14.6, 48.4] | 24.6 [9.20, 39.0] | 32.4 [9.20, 48.4] |
| hyper | |||
| 0 | 430 (49.9%) | 170 (47.9%) | 600 (49.3%) |
| 1 | 432 (50.1%) | 185 (52.1%) | 617 (50.7%) |
| diab | |||
| 0 | 760 (88.2%) | 322 (90.7%) | 1082 (88.9%) |
| 1 | 102 (11.8%) | 33 (9.3%) | 135 (11.1%) |
library(compareGroups)
createTable(compareGroups(gender ~ age + weight + height + pcfat + hyper + diab, data = ob))
##
## --------Summary descriptives table by 'gender'---------
##
## ________________________________________
## F M p.overall
## N=862 N=355
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
## age 48.6 (16.4) 43.7 (18.8) <0.001
## weight 52.3 (7.72) 62.0 (9.59) <0.001
## height 153 (5.55) 165 (6.73) <0.001
## pcfat 34.7 (5.19) 24.2 (5.76) <0.001
## hyper: 0.569
## 0 430 (49.9%) 170 (47.9%)
## 1 432 (50.1%) 185 (52.1%)
## diab: 0.238
## 0 760 (88.2%) 322 (90.7%)
## 1 102 (11.8%) 33 (9.30%)
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
A = c(14, 4, 10, 6, 3, 11, 12)
B = c(16, 17, 13, 12, 7, 16, 11, 8, 7)
wt = c(A, B)
group = c(rep("A", 7), rep("B", 9))
df = data.frame(wt, group)
dim(df)
## [1] 16 2
library(lessR)
##
## lessR 4.4.5 feedback: gerbing@pdx.edu
## --------------------------------------------------------------
## > d <- Read("") Read data file, many formats available, e.g., Excel
## d is default data frame, data= in analysis routines optional
##
## Many examples of reading, writing, and manipulating data,
## graphics, testing means and proportions, regression, factor analysis,
## customization, forecasting, and aggregation from pivot tables
## Enter: browseVignettes("lessR")
##
## View lessR updates, now including time series forecasting
## Enter: news(package="lessR")
##
## Interactive data analysis
## Enter: interact()
##
## Attaching package: 'lessR'
## The following object is masked from 'package:table1':
##
## label
Histogram(wt, data = df)
## >>> Note: wt is not in a data frame (table)
## >>> Note: wt is not in a data frame (table)
## >>> Suggestions
## bin_width: set the width of each bin
## bin_start: set the start of the first bin
## bin_end: set the end of the last bin
## Histogram(wt, density=TRUE) # smoothed curve + histogram
## Plot(wt) # Violin/Box/Scatterplot (VBS) plot
##
## --- wt ---
##
## n miss mean sd min mdn max
## 16 0 10.44 4.29 3.00 11.00 17.00
##
##
## No (Box plot) outliers
##
##
## Bin Width: 2
## Number of Bins: 8
##
## Bin Midpnt Count Prop Cumul.c Cumul.p
## -------------------------------------------------
## 2 > 4 3 2 0.12 2 0.12
## 4 > 6 5 1 0.06 3 0.19
## 6 > 8 7 3 0.19 6 0.38
## 8 > 10 9 1 0.06 7 0.44
## 10 > 12 11 4 0.25 11 0.69
## 12 > 14 13 2 0.12 13 0.81
## 14 > 16 15 2 0.12 15 0.94
## 16 > 18 17 1 0.06 16 1.00
##
shapiro.test(df$wt)
##
## Shapiro-Wilk normality test
##
## data: df$wt
## W = 0.96213, p-value = 0.7006
library(table1)
table1(~ wt | group, data = df, render.continuous = c(. = "Mean (SD)", . = "Median [Q1, Q3]"))
| A (N=7) |
B (N=9) |
Overall (N=16) |
|
|---|---|---|---|
| wt | |||
| Mean (SD) | 8.57 (4.24) | 11.9 (3.95) | 10.4 (4.29) |
| Median [Q1, Q3] | 10.0 [5.00, 11.5] | 12.0 [8.00, 16.0] | 11.0 [7.00, 13.3] |
t.test(A, B)
##
## Welch Two Sample t-test
##
## data: A and B
## t = -1.6, df = 12.554, p-value = 0.1345
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -7.813114 1.178194
## sample estimates:
## mean of x mean of y
## 8.571429 11.888889
t.test(wt ~ group, data = df)
##
## Welch Two Sample t-test
##
## data: wt by group
## t = -1.6, df = 12.554, p-value = 0.1345
## alternative hypothesis: true difference in means between group A and group B is not equal to 0
## 95 percent confidence interval:
## -7.813114 1.178194
## sample estimates:
## mean in group A mean in group B
## 8.571429 11.888889
library(simpleboot)
## Simple Bootstrap Routines (1.1-8)
library(boot)
b = two.boot(A, B, mean, R = 1000)
boot.ci(b)
## BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
## Based on 1000 bootstrap replicates
##
## CALL :
## boot.ci(boot.out = b)
##
## Intervals :
## Level Normal Basic
## 95% (-7.105, 0.491 ) (-7.238, 0.540 )
##
## Level Percentile BCa
## 95% (-7.175, 0.603 ) (-7.175, 0.610 )
## Calculations and Intervals on Original Scale
hist(b, breaks = 50)
Sử dụng ChatGPT PROMPT: Tôi có dữ liệu cho 2 nhóm như sau. Bạn có thể
viết mã R để phân tích sự khác biệt giữa hai nhóm dùng bootstrap? A =
c(14, 4, 10, 6, 3, 11, 12) B = c(16, 17, 13, 12, 7, 16, 11, 8, 7)
library(boot)
A <- c(14, 4, 10, 6, 3, 11, 12)
B <- c(16, 17, 13, 12, 7, 16, 11, 8, 7)
# Hàm trả về chênh lệch trung bình giữa hai nhóm
diff_mean <- function(data1, data2) {
mean(data1) - mean(data2)
}
obs_diff <- diff_mean(A, B)
obs_diff
## [1] -3.31746
library(boot)
# Gộp dữ liệu thành một danh sách để truyền vào boot
data_list <- list(A = A, B = B)
# Hàm bootstrap
boot_diff <- function(data, indices) {
A_boot <- sample(data$A, replace = TRUE)
B_boot <- sample(data$B, replace = TRUE)
return(mean(A_boot) - mean(B_boot))
}
# Chạy bootstrap 10,000 lần
set.seed(123)
boot_result <- boot(data = data_list, statistic = boot_diff, R = 10000)
# Tóm tắt kết quả bootstrap
boot_result
##
## ORDINARY NONPARAMETRIC BOOTSTRAP
##
##
## Call:
## boot(data = data_list, statistic = boot_diff, R = 10000)
##
##
## Bootstrap Statistics :
## original bias std. error
## t1* -4.333333 1.020943 1.90975
# Khoảng tin cậy 95%
boot.ci(boot_result, type = c("perc", "bca"))
## BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
## Based on 10000 bootstrap replicates
##
## CALL :
## boot.ci(boot.out = boot_result, type = c("perc", "bca"))
##
## Intervals :
## Level Percentile BCa
## 95% (-7.032, 0.397 ) (-9.306, -1.556 )
## Calculations and Intervals on Original Scale
hist(boot_result$t,
main = "Phân phối bootstrap của chênh lệch trung bình",
xlab = "Mean(A) - Mean(B)",
col = "lightblue", border = "gray")
abline(v = obs_diff, col = "red", lwd = 2)
boot_diff_median <- function(data, indices) {
A_boot <- sample(data$A, replace = TRUE)
B_boot <- sample(data$B, replace = TRUE)
return(median(A_boot) - median(B_boot))
}
boot_result_median <- boot(data = data_list, statistic = boot_diff_median, R = 10000)
boot.ci(boot_result_median, type = c("perc", "bca"))
## BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
## Based on 10000 bootstrap replicates
##
## CALL :
## boot.ci(boot.out = boot_result_median, type = c("perc", "bca"))
##
## Intervals :
## Level Percentile BCa
## 95% (-10, 3 ) (-13, -6 )
## Calculations and Intervals on Original Scale
## Warning : BCa Intervals used Extreme Quantiles
## Some BCa intervals may be unstable
# Hàm tiện ích để tạo chỉ số bootstrap riêng cho mỗi nhóm
boot_2groups <- function(A, B, R = 2000) {
nA <- length(A)
nB <- length(B)
boot_values <- numeric(R)
for (i in 1:R) {
idxA <- sample(1:nA, nA, replace = TRUE)
idxB <- sample(1:nB, nB, replace = TRUE)
boot_values[i] <- mean(A[idxA]) - mean(B[idxB])
}
return(boot_values)
}
# Chạy bootstrap
set.seed(123)
boot_values <- boot_2groups(A, B, R = 2000)
# Kết quả ước lượng
diff_estimate <- mean(A) - mean(B)
diff_estimate
## [1] -3.31746
# Tạo đối tượng "boot" để dùng hàm boot.ci
boot_obj <- boot(data = list(A = A, B = B),
statistic = function(data, i) {
# i ở đây là vector cho A và B (chúng ta tạo tách riêng)
idxA <- sample(1:length(data$A), length(data$A), replace = TRUE)
idxB <- sample(1:length(data$B), length(data$B), replace = TRUE)
mean(data$A[idxA]) - mean(data$B[idxB])
},
R = 2000)
boot_obj
##
## ORDINARY NONPARAMETRIC BOOTSTRAP
##
##
## Call:
## boot(data = list(A = A, B = B), statistic = function(data, i) {
## idxA <- sample(1:length(data$A), length(data$A), replace = TRUE)
## idxB <- sample(1:length(data$B), length(data$B), replace = TRUE)
## mean(data$A[idxA]) - mean(data$B[idxB])
## }, R = 2000)
##
##
## Bootstrap Statistics :
## original bias std. error
## t1* 0.1746032 -3.404722 1.901877
boot.ci(boot_obj, conf = 0.95, type = c("norm", "basic", "perc", "bca"))
## BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
## Based on 2000 bootstrap replicates
##
## CALL :
## boot.ci(boot.out = boot_obj, conf = 0.95, type = c("norm", "basic",
## "perc", "bca"))
##
## Intervals :
## Level Normal Basic
## 95% (-0.1483, 7.3069 ) (-0.0635, 7.3333 )
##
## Level Percentile BCa
## 95% (-6.9841, 0.4127 ) (-0.2647, 2.6032 )
## Calculations and Intervals on Original Scale
## Warning : BCa Intervals used Extreme Quantiles
## Some BCa intervals may be unstable