##1. Lấy data birthwt
n3=read.csv("D:\\OneDrive\\Documents\\HPK1 2018\\NVPS2025\\Viet bao khoa hoc paper\\R\\DU LIEU THUC HANH\\birthwt.csv")
##2. đọc packages Table1, Mô tả đặc điểm tuổi mẹ, cân nặng mẹ và cân nặng con
library(table1)
##
## Attaching package: 'table1'
## The following objects are masked from 'package:base':
##
## units, units<-
table1(~ age + lwt + bwt, data = n3)
| Overall (N=189) |
|
|---|---|
| age | |
| Mean (SD) | 23.2 (5.30) |
| Median [Min, Max] | 23.0 [14.0, 45.0] |
| lwt | |
| Mean (SD) | 130 (30.6) |
| Median [Min, Max] | 121 [80.0, 250] |
| bwt | |
| Mean (SD) | 2940 (729) |
| Median [Min, Max] | 2980 [709, 4990] |
##3. Mô tả đặc điểm tuổi mẹ, cân nặng mẹ, tình trạng hút thuốc trong thai kỳ, chủng tộc và cân nặng con theo tình trạng trẻ sanh thiếu cân
n3$race.c = as.character(factor(n3$race, levels = c(1, 2, 3), labels = c("White", "Black", "Other")))
n3$smoke.c = as.character(factor(n3$smoke, levels = c(0, 1), labels = c("No", "Yes")))
n3$low.c = as.character(factor(n3$low, levels = c(0, 1), labels = c("No LBW", "Low birthweight")))
table1(~ age + lwt + smoke.c + race.c + bwt | low.c, data = n3)
| Low birthweight (N=59) |
No LBW (N=130) |
Overall (N=189) |
|
|---|---|---|---|
| age | |||
| Mean (SD) | 22.3 (4.51) | 23.7 (5.58) | 23.2 (5.30) |
| Median [Min, Max] | 22.0 [14.0, 34.0] | 23.0 [14.0, 45.0] | 23.0 [14.0, 45.0] |
| lwt | |||
| Mean (SD) | 122 (26.6) | 133 (31.7) | 130 (30.6) |
| Median [Min, Max] | 120 [80.0, 200] | 124 [85.0, 250] | 121 [80.0, 250] |
| smoke.c | |||
| No | 29 (49.2%) | 86 (66.2%) | 115 (60.8%) |
| Yes | 30 (50.8%) | 44 (33.8%) | 74 (39.2%) |
| race.c | |||
| Black | 11 (18.6%) | 15 (11.5%) | 26 (13.8%) |
| Other | 25 (42.4%) | 42 (32.3%) | 67 (35.4%) |
| White | 23 (39.0%) | 73 (56.2%) | 96 (50.8%) |
| bwt | |||
| Mean (SD) | 2100 (391) | 3330 (478) | 2940 (729) |
| Median [Min, Max] | 2210 [709, 2500] | 3270 [2520, 4990] | 2980 [709, 4990] |
##4.Sử dụng gói lệnh ‘table1’ để mô tả đặc điểm tuổi của mẹ (age), cân nặng của mẹ (lwt), tình trạng hút thuốc trong thai kỳ (smoke), chủng tộc (race), và cân nặng của con (bwt) theo tình trạng trẻ thiếu cân (low)
# 2. Chuyển các biến thành factor với nhãn trực quan
n3$low <- factor(n3$low, levels = c(0,1), labels = c("Normal", "Low"))
n3$smoke <- factor(n3$smoke, levels = c(0,1), labels = c("No", "Yes"))
n3$race <- factor(n3$race, levels = c(1,2,3),
labels = c("Caucasian", "African American", "Other"))
# 3. Đặt nhãn cho biến (table1 sẽ dùng nhãn này thay tên cột)
label(n3$age) <- "Mother's Age (years)"
label(n3$lwt) <- "Mother's Weight (lbs)"
label(n3$smoke) <- "Smoking During Pregnancy"
label(n3$race) <- "Mother's Race"
label(n3$bwt) <- "Infant Birth Weight (g)"
# 4. Tạo bảng mô tả
table1(~ age + lwt + smoke + race + bwt | low,
data = n3,
caption = "Descriptive characteristics by birthweight status",
render.missing = FALSE)
| Normal (N=130) |
Low (N=59) |
Overall (N=189) |
|
|---|---|---|---|
| Mother's Age (years) | |||
| Mean (SD) | 23.7 (5.58) | 22.3 (4.51) | 23.2 (5.30) |
| Median [Min, Max] | 23.0 [14.0, 45.0] | 22.0 [14.0, 34.0] | 23.0 [14.0, 45.0] |
| Mother's Weight (lbs) | |||
| Mean (SD) | 133 (31.7) | 122 (26.6) | 130 (30.6) |
| Median [Min, Max] | 124 [85.0, 250] | 120 [80.0, 200] | 121 [80.0, 250] |
| Smoking During Pregnancy | |||
| No | 86 (66.2%) | 29 (49.2%) | 115 (60.8%) |
| Yes | 44 (33.8%) | 30 (50.8%) | 74 (39.2%) |
| Mother's Race | |||
| Caucasian | 73 (56.2%) | 23 (39.0%) | 96 (50.8%) |
| African American | 15 (11.5%) | 11 (18.6%) | 26 (13.8%) |
| Other | 42 (32.3%) | 25 (42.4%) | 67 (35.4%) |
| Infant Birth Weight (g) | |||
| Mean (SD) | 3330 (478) | 2100 (391) | 2940 (729) |
| Median [Min, Max] | 3270 [2520, 4990] | 2210 [709, 2500] | 2980 [709, 4990] |
##5. Vẽ histogram đánh giá phân bố mật độ xương
xu = read.csv("D:\\OneDrive\\Documents\\HPK1 2018\\NVPS2025\\Viet bao khoa hoc paper\\R\\DU LIEU THUC HANH\\Bone data.csv")
dim(xu)
## [1] 2162 9
library(lessR)
##
## lessR 4.4.2 feedback: gerbing@pdx.edu
## --------------------------------------------------------------
## > d <- Read("") Read data file, many formats available, e.g., Excel
## d is default data frame, data= in analysis routines optional
##
## Many examples of reading, writing, and manipulating data,
## graphics, testing means and proportions, regression, factor analysis,
## customization, forecasting, and aggregation from pivot tables
## Enter: browseVignettes("lessR")
##
## View lessR updates, now including time series forecasting
## Enter: news(package="lessR")
##
## Interactive data analysis
## Enter: interact()
##
## Attaching package: 'lessR'
## The following object is masked from 'package:table1':
##
## label
## The following object is masked from 'package:base':
##
## sort_by
Histogram(fnbmd, fill = "blue", xlab = "Mật độ xương (g/cm2)", ylab = "Frequency", data = xu)
## >>> Suggestions
## bin_width: set the width of each bin
## bin_start: set the start of the first bin
## bin_end: set the end of the last bin
## Histogram(fnbmd, density=TRUE) # smoothed curve + histogram
## Plot(fnbmd) # Violin/Box/Scatterplot (VBS) plot
##
## --- fnbmd ---
##
## n miss mean sd min mdn max
## 2122 40 0.829 0.155 0.280 0.820 1.510
##
##
##
## --- Outliers --- from the box plot: 33
##
## Small Large
## ----- -----
## 0.3 1.5
## 0.3 1.5
## 0.4 1.4
## 0.4 1.4
## 0.4 1.4
## 0.4 1.4
## 0.4 1.4
## 0.4 1.4
## 0.4 1.3
## 0.4 1.3
## 0.4 1.3
## 1.3
## 1.3
## 1.3
## 1.3
## 1.2
## 1.2
## 1.2
##
## + 15 more outliers
##
##
## Bin Width: 0.1
## Number of Bins: 14
##
## Bin Midpnt Count Prop Cumul.c Cumul.p
## ---------------------------------------------------
## 0.2 > 0.3 0.25 1 0.00 1 0.00
## 0.3 > 0.4 0.35 9 0.00 10 0.00
## 0.4 > 0.5 0.45 15 0.01 25 0.01
## 0.5 > 0.6 0.55 103 0.05 128 0.06
## 0.6 > 0.7 0.65 306 0.14 434 0.20
## 0.7 > 0.8 0.75 522 0.24 956 0.44
## 0.8 > 0.9 0.85 534 0.25 1490 0.69
## 0.9 > 1.0 0.95 371 0.17 1861 0.86
## 1.0 > 1.1 1.05 183 0.08 2044 0.95
## 1.1 > 1.2 1.15 48 0.02 2092 0.97
## 1.2 > 1.3 1.25 21 0.01 2113 0.98
## 1.3 > 1.4 1.35 6 0.00 2119 0.98
## 1.4 > 1.5 1.45 2 0.00 2121 0.98
## 1.5 > 1.6 1.55 1 0.00 2122 0.98
##
##6. So sánh mật độ xương cổ xương đùi giữa namvà nữ ữfnbmd: mật độ xương
library(table1)
table1(~ fnbmd | sex, data = xu)
| Female (N=1317) |
Male (N=845) |
Overall (N=2162) |
|
|---|---|---|---|
| fnbmd | |||
| Mean (SD) | 0.778 (0.132) | 0.910 (0.153) | 0.829 (0.155) |
| Median [Min, Max] | 0.770 [0.280, 1.31] | 0.900 [0.340, 1.51] | 0.820 [0.280, 1.51] |
| Missing | 17 (1.3%) | 23 (2.7%) | 40 (1.9%) |
t.test(fnbmd ~ sex, data = xu)
##
## Welch Two Sample t-test
##
## data: fnbmd by sex
## t = -20.407, df = 1561, p-value < 0.00000000000000022
## alternative hypothesis: true difference in means between group Female and group Male is not equal to 0
## 95 percent confidence interval:
## -0.1448770 -0.1194686
## sample estimates:
## mean in group Female mean in group Male
## 0.7775231 0.9096959
##7.T-Test
library(lessR)
ttest(fnbmd ~ sex, data = xu)
##
## Compare fnbmd across sex with levels Male and Female
## Grouping Variable: sex
## Response Variable: fnbmd
##
##
## ------ Describe ------
##
## fnbmd for sex Male: n.miss = 23, n = 822, mean = 0.910, sd = 0.153
## fnbmd for sex Female: n.miss = 17, n = 1300, mean = 0.778, sd = 0.132
##
## Mean Difference of fnbmd: 0.132
##
## Weighted Average Standard Deviation: 0.141
##
##
## ------ Assumptions ------
##
## Note: These hypothesis tests can perform poorly, and the
## t-test is typically robust to violations of assumptions.
## Use as heuristic guides instead of interpreting literally.
##
## Null hypothesis, for each group, is a normal distribution of fnbmd.
## Group Male: Sample mean assumed normal because n > 30, so no test needed.
## Group Female: Sample mean assumed normal because n > 30, so no test needed.
##
## Null hypothesis is equal variances of fnbmd, homogeneous.
## Variance Ratio test: F = 0.023/0.018 = 1.336, df = 821;1299, p-value = 0.000
## Levene's test, Brown-Forsythe: t = 3.449, df = 2120, p-value = 0.001
##
##
## ------ Infer ------
##
## --- Assume equal population variances of fnbmd for each sex
##
## t-cutoff for 95% range of variation: tcut = 1.961
## Standard Error of Mean Difference: SE = 0.006
##
## Hypothesis Test of 0 Mean Diff: t-value = 21.080, df = 2120, p-value = 0.000
##
## Margin of Error for 95% Confidence Level: 0.012
## 95% Confidence Interval for Mean Difference: 0.120 to 0.144
##
##
## --- Do not assume equal population variances of fnbmd for each sex
##
## t-cutoff: tcut = 1.961
## Standard Error of Mean Difference: SE = 0.006
##
## Hypothesis Test of 0 Mean Diff: t = 20.407, df = 1560.981, p-value = 0.000
##
## Margin of Error for 95% Confidence Level: 0.013
## 95% Confidence Interval for Mean Difference: 0.119 to 0.145
##
##
## ------ Effect Size ------
##
## --- Assume equal population variances of fnbmd for each sex
##
## Standardized Mean Difference of fnbmd, Cohen's d: 0.939
##
##
## ------ Practical Importance ------
##
## Minimum Mean Difference of practical importance: mmd
## Minimum Standardized Mean Difference of practical importance: msmd
## Neither value specified, so no analysis
##
##
## ------ Graphics Smoothing Parameter ------
##
## Density bandwidth for sex Male: 0.044
## Density bandwidth for sex Female: 0.034
##8.Đánh giá ảnh hưởng của cafe lên RER, phân tích sự khác biệt
placebo = c(105, 119, 100, 97, 96, 101, 94, 95, 98)
coffee = c(96, 99, 94, 89, 96, 93, 88, 105, 88)
###8.1. Cài pack simpleboot ###8.2. Chạy bootstrap phân tích
library(simpleboot)
## Simple Bootstrap Routines (1.1-8)
library(boot)
b = two.boot(placebo, coffee, mean, R = 500)
boot.ci(b)
## BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
## Based on 500 bootstrap replicates
##
## CALL :
## boot.ci(boot.out = b)
##
## Intervals :
## Level Normal Basic
## 95% ( 0.639, 12.122 ) ( 0.563, 11.889 )
##
## Level Percentile BCa
## 95% ( 0.778, 12.104 ) ( 0.867, 12.519 )
## Calculations and Intervals on Original Scale
###8.3. vẽ histogram
hist(b, breaks = 50)
### chat GPT # 1. Khởi tạo dữ liệu placebo <- c(105, 119, 100, 97,
96, 101, 94, 95, 98) coffee <- c( 96, 99, 94, 89, 96, 93, 88, 105,
88)
diff_mean <- function(data, indices) { n <- length(data) / 2 x <- data[indices] mean(x[1:n]) - mean(x[(n+1):(2*n)]) }
d_all <- c(placebo, coffee)
library(boot) set.seed(123) # để kết quả có thể tái lập boot_res <- boot(data = d_all, statistic = diff_mean, R = 10000)
boot_res
ORDINARY NONPARAMETRIC BOOTSTRAP
Call: boot(data = d_all, statistic = diff_mean, R = 10000)
Bootstrap Statistics : original bias std. error t1* 6.333333 -6.341222 3.341927 # 6. Khoảng tin cậy 95% boot.ci(boot_res, type = c(“perc”, “bca”)) BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS Based on 10000 bootstrap replicates
CALL : boot.ci(boot.out = boot_res, type = c(“perc”, “bca”))
Intervals : Level Percentile BCa
95% (-6.556, 6.556 ) ( 6.111, 11.556 )
Calculations and Intervals on Original Scale Warning : BCa Intervals
used Extreme Quantiles Some BCa intervals may be unstable
plot(boot_res) ###
##9.So sánh 2 nhóm - biến phân loại ###9.1.Việc 5. So sánh tỉ lệ gãy xương giữa nam và nữ
table1(~ as.factor(fx) | sex, data = xu)
| Female (N=1317) |
Male (N=845) |
Overall (N=2162) |
|
|---|---|---|---|
| as.factor(fx) | |||
| 0 | 916 (69.6%) | 701 (83.0%) | 1617 (74.8%) |
| 1 | 401 (30.4%) | 144 (17.0%) | 545 (25.2%) |
###9.2.ki bình phương chi sq test
chisq.test(xu$fx, xu$sex)
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: xu$fx and xu$sex
## X-squared = 48.363, df = 1, p-value = 0.000000000003542
##Kết thúc