Việc 2. Phân tích mô tả:

bw=read.csv("D:/Learning/CME/R Statistic 2025/DỮ LIỆU THỰC HÀNH (TS Thạch gửi)/birthwt.csv")
library(table1)
## 
## Attaching package: 'table1'
## The following objects are masked from 'package:base':
## 
##     units, units<-
table1(~age+lwt+bwt,data=bw)
Overall
(N=189)
age
Mean (SD) 23.2 (5.30)
Median [Min, Max] 23.0 [14.0, 45.0]
lwt
Mean (SD) 130 (30.6)
Median [Min, Max] 121 [80.0, 250]
bwt
Mean (SD) 2940 (729)
Median [Min, Max] 2980 [709, 4990]
table1(~age+lwt+smoke+race+bwt|low,data=bw)
## Warning in table1.formula(~age + lwt + smoke + race + bwt | low, data = bw):
## Terms to the right of '|' in formula 'x' define table columns and are expected
## to be factors with meaningful labels.
0
(N=130)
1
(N=59)
Overall
(N=189)
age
Mean (SD) 23.7 (5.58) 22.3 (4.51) 23.2 (5.30)
Median [Min, Max] 23.0 [14.0, 45.0] 22.0 [14.0, 34.0] 23.0 [14.0, 45.0]
lwt
Mean (SD) 133 (31.7) 122 (26.6) 130 (30.6)
Median [Min, Max] 124 [85.0, 250] 120 [80.0, 200] 121 [80.0, 250]
smoke
Mean (SD) 0.338 (0.475) 0.508 (0.504) 0.392 (0.489)
Median [Min, Max] 0 [0, 1.00] 1.00 [0, 1.00] 0 [0, 1.00]
race
Mean (SD) 1.76 (0.913) 2.03 (0.909) 1.85 (0.918)
Median [Min, Max] 1.00 [1.00, 3.00] 2.00 [1.00, 3.00] 1.00 [1.00, 3.00]
bwt
Mean (SD) 3330 (478) 2100 (391) 2940 (729)
Median [Min, Max] 3270 [2520, 4990] 2210 [709, 2500] 2980 [709, 4990]
table1(~age+lwt+as.factor(smoke)+as.factor(race)+bwt|low,data=bw)
## Warning in table1.formula(~age + lwt + as.factor(smoke) + as.factor(race) + :
## Terms to the right of '|' in formula 'x' define table columns and are expected
## to be factors with meaningful labels.
0
(N=130)
1
(N=59)
Overall
(N=189)
age
Mean (SD) 23.7 (5.58) 22.3 (4.51) 23.2 (5.30)
Median [Min, Max] 23.0 [14.0, 45.0] 22.0 [14.0, 34.0] 23.0 [14.0, 45.0]
lwt
Mean (SD) 133 (31.7) 122 (26.6) 130 (30.6)
Median [Min, Max] 124 [85.0, 250] 120 [80.0, 200] 121 [80.0, 250]
as.factor(smoke)
0 86 (66.2%) 29 (49.2%) 115 (60.8%)
1 44 (33.8%) 30 (50.8%) 74 (39.2%)
as.factor(race)
1 73 (56.2%) 23 (39.0%) 96 (50.8%)
2 15 (11.5%) 11 (18.6%) 26 (13.8%)
3 42 (32.3%) 25 (42.4%) 67 (35.4%)
bwt
Mean (SD) 3330 (478) 2100 (391) 2940 (729)
Median [Min, Max] 3270 [2520, 4990] 2210 [709, 2500] 2980 [709, 4990]

Xem lại: table1(~age+lwt+as.factor(smoke)+as.factor(race)+bwt|low,data=bw, render.continuous = c(.=“Mean (SD)”,.=“Median [Q1 - Q3]”,.=“Median [min,max]”))

Việc 3. So sánh mật độ xương tại cổ xương đùi giữa nam và nữ

df=read.csv("D:/Learning/CME/R Statistic 2025/DỮ LIỆU THỰC HÀNH (TS Thạch gửi)/Bone data.csv")
library(lessR)
## 
## lessR 4.4.3                         feedback: gerbing@pdx.edu 
## --------------------------------------------------------------
## > d <- Read("")  Read data file, many formats available, e.g., Excel
##   d is default data frame, data= in analysis routines optional
## 
## Many examples of reading, writing, and manipulating data, 
## graphics, testing means and proportions, regression, factor analysis,
## customization, forecasting, and aggregation from pivot tables
##   Enter: browseVignettes("lessR")
## 
## View lessR updates, now including time series forecasting
##   Enter: news(package="lessR")
## 
## Interactive data analysis
##   Enter: interact()
## 
## Attaching package: 'lessR'
## The following object is masked from 'package:table1':
## 
##     label
## The following object is masked from 'package:base':
## 
##     sort_by
Histogram(data=df,fnbmd)

## >>> Suggestions 
## bin_width: set the width of each bin 
## bin_start: set the start of the first bin 
## bin_end: set the end of the last bin 
## Histogram(fnbmd, density=TRUE)  # smoothed curve + histogram 
## Plot(fnbmd)  # Violin/Box/Scatterplot (VBS) plot 
## 
## --- fnbmd --- 
##  
##        n    miss      mean        sd       min       mdn       max 
##      2122      40     0.829     0.155     0.280     0.820     1.510 
##  
## 
##   
## --- Outliers ---     from the box plot: 33 
##  
## Small      Large 
## -----      ----- 
##  0.3      1.5 
##  0.3      1.5 
##  0.4      1.4 
##  0.4      1.4 
##  0.4      1.4 
##  0.4      1.4 
##  0.4      1.4 
##  0.4      1.4 
##  0.4      1.3 
##  0.4      1.3 
##  0.4      1.3 
##           1.3 
##           1.3 
##           1.3 
##           1.3 
##           1.2 
##           1.2 
##           1.2 
## 
## + 15 more outliers 
## 
## 
## Bin Width: 0.1 
## Number of Bins: 14 
##  
##        Bin  Midpnt  Count    Prop  Cumul.c  Cumul.p 
## --------------------------------------------------- 
##  0.2 > 0.3    0.25      1    0.00        1     0.00 
##  0.3 > 0.4    0.35      9    0.00       10     0.00 
##  0.4 > 0.5    0.45     15    0.01       25     0.01 
##  0.5 > 0.6    0.55    103    0.05      128     0.06 
##  0.6 > 0.7    0.65    306    0.14      434     0.20 
##  0.7 > 0.8    0.75    522    0.24      956     0.44 
##  0.8 > 0.9    0.85    534    0.25     1490     0.69 
##  0.9 > 1.0    0.95    371    0.17     1861     0.86 
##  1.0 > 1.1    1.05    183    0.08     2044     0.95 
##  1.1 > 1.2    1.15     48    0.02     2092     0.97 
##  1.2 > 1.3    1.25     21    0.01     2113     0.98 
##  1.3 > 1.4    1.35      6    0.00     2119     0.98 
##  1.4 > 1.5    1.45      2    0.00     2121     0.98 
##  1.5 > 1.6    1.55      1    0.00     2122     0.98 
## 
ttest(fnbmd~sex,data=df)
## 
## Compare fnbmd across sex with levels Male and Female 
## Grouping Variable:  sex
## Response Variable:  fnbmd
## 
## 
## ------ Describe ------
## 
## fnbmd for sex Male:  n.miss = 23,  n = 822,  mean = 0.910,  sd = 0.153
## fnbmd for sex Female:  n.miss = 17,  n = 1300,  mean = 0.778,  sd = 0.132
## 
## Mean Difference of fnbmd:  0.132
## 
## Weighted Average Standard Deviation:   0.141 
## 
## 
## ------ Assumptions ------
## 
## Note: These hypothesis tests can perform poorly, and the 
##       t-test is typically robust to violations of assumptions. 
##       Use as heuristic guides instead of interpreting literally. 
## 
## Null hypothesis, for each group, is a normal distribution of fnbmd.
## Group Male: Sample mean assumed normal because n > 30, so no test needed.
## Group Female: Sample mean assumed normal because n > 30, so no test needed.
## 
## Null hypothesis is equal variances of fnbmd, homogeneous.
## Variance Ratio test:  F = 0.023/0.018 = 1.336,  df = 821;1299,  p-value = 0.000
## Levene's test, Brown-Forsythe:  t = 3.449,  df = 2120,  p-value = 0.001
## 
## 
## ------ Infer ------
## 
## --- Assume equal population variances of fnbmd for each sex 
## 
## t-cutoff for 95% range of variation: tcut =  1.961 
## Standard Error of Mean Difference: SE =  0.006 
## 
## Hypothesis Test of 0 Mean Diff:  t-value = 21.080,  df = 2120,  p-value = 0.000
## 
## Margin of Error for 95% Confidence Level:  0.012
## 95% Confidence Interval for Mean Difference:  0.120 to 0.144
## 
## 
## --- Do not assume equal population variances of fnbmd for each sex 
## 
## t-cutoff: tcut =  1.961 
## Standard Error of Mean Difference: SE =  0.006 
## 
## Hypothesis Test of 0 Mean Diff:  t = 20.407,  df = 1560.981, p-value = 0.000
## 
## Margin of Error for 95% Confidence Level:  0.013
## 95% Confidence Interval for Mean Difference:  0.119 to 0.145
## 
## 
## ------ Effect Size ------
## 
## --- Assume equal population variances of fnbmd for each sex 
## 
## Standardized Mean Difference of fnbmd, Cohen's d:  0.939
## 
## 
## ------ Practical Importance ------
## 
## Minimum Mean Difference of practical importance: mmd
## Minimum Standardized Mean Difference of practical importance: msmd
## Neither value specified, so no analysis
## 
## 
## ------ Graphics Smoothing Parameter ------
## 
## Density bandwidth for sex Male: 0.044
## Density bandwidth for sex Female: 0.034

##So sánh 2 nhóm nam nữ

men=subset(df,sex=="Male")
ttest(fnbmd~smoking,data=men)
## 
## Compare fnbmd across smoking with levels 0 and 1 
## Grouping Variable:  smoking
## Response Variable:  fnbmd
## 
## 
## ------ Describe ------
## 
## fnbmd for smoking 0:  n.miss = 8,  n = 312,  mean = 0.932,  sd = 0.151
## fnbmd for smoking 1:  n.miss = 15,  n = 510,  mean = 0.896,  sd = 0.153
## 
## Mean Difference of fnbmd:  0.036
## 
## Weighted Average Standard Deviation:   0.152 
## 
## 
## ------ Assumptions ------
## 
## Note: These hypothesis tests can perform poorly, and the 
##       t-test is typically robust to violations of assumptions. 
##       Use as heuristic guides instead of interpreting literally. 
## 
## Null hypothesis, for each group, is a normal distribution of fnbmd.
## Group 0: Sample mean assumed normal because n > 30, so no test needed.
## Group 1: Sample mean assumed normal because n > 30, so no test needed.
## 
## Null hypothesis is equal variances of fnbmd, homogeneous.
## Variance Ratio test:  F = 0.023/0.023 = 1.028,  df = 509;311,  p-value = 0.795
## Levene's test, Brown-Forsythe:  t = -1.122,  df = 820,  p-value = 0.262
## 
## 
## ------ Infer ------
## 
## --- Assume equal population variances of fnbmd for each smoking 
## 
## t-cutoff for 95% range of variation: tcut =  1.963 
## Standard Error of Mean Difference: SE =  0.011 
## 
## Hypothesis Test of 0 Mean Diff:  t-value = 3.316,  df = 820,  p-value = 0.001
## 
## Margin of Error for 95% Confidence Level:  0.021
## 95% Confidence Interval for Mean Difference:  0.015 to 0.058
## 
## 
## --- Do not assume equal population variances of fnbmd for each smoking 
## 
## t-cutoff: tcut =  1.964 
## Standard Error of Mean Difference: SE =  0.011 
## 
## Hypothesis Test of 0 Mean Diff:  t = 3.327,  df = 664.491, p-value = 0.001
## 
## Margin of Error for 95% Confidence Level:  0.021
## 95% Confidence Interval for Mean Difference:  0.015 to 0.058
## 
## 
## ------ Effect Size ------
## 
## --- Assume equal population variances of fnbmd for each smoking 
## 
## Standardized Mean Difference of fnbmd, Cohen's d:  0.238
## 
## 
## ------ Practical Importance ------
## 
## Minimum Mean Difference of practical importance: mmd
## Minimum Standardized Mean Difference of practical importance: msmd
## Neither value specified, so no analysis
## 
## 
## ------ Graphics Smoothing Parameter ------
## 
## Density bandwidth for smoking 0: 0.043
## Density bandwidth for smoking 1: 0.050

## Mô tả lại mật độ xương và smoking theo giới

table1(~fnbmd + as.factor(smoking)|sex,data=df)
Female
(N=1317)
Male
(N=845)
Overall
(N=2162)
fnbmd
Mean (SD) 0.778 (0.132) 0.910 (0.153) 0.829 (0.155)
Median [Min, Max] 0.770 [0.280, 1.31] 0.900 [0.340, 1.51] 0.820 [0.280, 1.51]
Missing 17 (1.3%) 23 (2.7%) 40 (1.9%)
as.factor(smoking)
0 923 (70.1%) 320 (37.9%) 1243 (57.5%)
1 394 (29.9%) 525 (62.1%) 919 (42.5%)

Việc 4. Đánh giá ảnh hưởng của café lên RER(mức độ dao động của mắt)

placebo=c(105, 119, 100, 97, 96, 101, 94, 95, 98)
coffee=c(96, 99, 94, 89, 96, 93, 88, 105, 88 )

chạy Bootstrap

library(boot); library(simpleboot)
## Simple Bootstrap Routines (1.1-8)
b = two.boot(placebo, coffee, mean, R = 500) #so sánh 2 biến bằng lệnh two.boot
boot.ci(b) #tính khoảng khác biệt giữa 2 biến,thường dùng kết quả của phân bố BCa: Bias correction analysis, nếu các phân bố khác như normal, basic hay percentile cùng hướng thì càng tốt
## BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
## Based on 500 bootstrap replicates
## 
## CALL : 
## boot.ci(boot.out = b)
## 
## Intervals : 
## Level      Normal              Basic         
## 95%   ( 0.523, 12.042 )   ( 0.444, 11.444 )  
## 
## Level     Percentile            BCa          
## 95%   ( 1.222, 12.222 )   ( 1.287, 12.271 )  
## Calculations and Intervals on Original Scale
hist(b, breaks=50) #breaks có "s", trục hoành là sự khác nhau giữa 2 nhóm

median(b$t) #thể hiện giá trị median, "t" là nơi chứa giá trị chênh lệch
## [1] 6.166667
quantile(b$t,probs=c(0.25,0.5,0.75),na.rm=TRUE) #thể hiện giá trị Q1,median,Q3
##      25%      50%      75% 
## 4.305556 6.166667 8.444444
mean(b$t) #thể hiện giá trị trung bình
## [1] 6.384222

Việc 5. So sánh tỉ lệ gãy xương (fx) giữa nam và nữ (sex)

table1(~ as.factor(fx) | sex, data = df)
Female
(N=1317)
Male
(N=845)
Overall
(N=2162)
as.factor(fx)
0 916 (69.6%) 701 (83.0%) 1617 (74.8%)
1 401 (30.4%) 144 (17.0%) 545 (25.2%)
chisq.test(df$fx, df$sex)
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  df$fx and df$sex
## X-squared = 48.363, df = 1, p-value = 0.000000000003542