Việc 2. Phân tích mô tả:

bw=read.csv("D:/Learning/CME/R Statistic 2025/DỮ LIỆU THỰC HÀNH (TS Thạch gửi)/birthwt.csv")
library(table1)

## 
## Attaching package: 'table1'

## The following objects are masked from 'package:base':
## 
##     units, units<-

table1(~age+lwt+bwt,data=bw)

	Overall (N=189)
age
Mean (SD)	23.2 (5.30)
Median [Min, Max]	23.0 [14.0, 45.0]
lwt
Mean (SD)	130 (30.6)
Median [Min, Max]	121 [80.0, 250]
bwt
Mean (SD)	2940 (729)
Median [Min, Max]	2980 [709, 4990]

table1(~age+lwt+smoke+race+bwt|low,data=bw)

## Warning in table1.formula(~age + lwt + smoke + race + bwt | low, data = bw):
## Terms to the right of '|' in formula 'x' define table columns and are expected
## to be factors with meaningful labels.

	0 (N=130)	1 (N=59)	Overall (N=189)
age
Mean (SD)	23.7 (5.58)	22.3 (4.51)	23.2 (5.30)
Median [Min, Max]	23.0 [14.0, 45.0]	22.0 [14.0, 34.0]	23.0 [14.0, 45.0]
lwt
Mean (SD)	133 (31.7)	122 (26.6)	130 (30.6)
Median [Min, Max]	124 [85.0, 250]	120 [80.0, 200]	121 [80.0, 250]
smoke
Mean (SD)	0.338 (0.475)	0.508 (0.504)	0.392 (0.489)
Median [Min, Max]	0 [0, 1.00]	1.00 [0, 1.00]	0 [0, 1.00]
race
Mean (SD)	1.76 (0.913)	2.03 (0.909)	1.85 (0.918)
Median [Min, Max]	1.00 [1.00, 3.00]	2.00 [1.00, 3.00]	1.00 [1.00, 3.00]
bwt
Mean (SD)	3330 (478)	2100 (391)	2940 (729)
Median [Min, Max]	3270 [2520, 4990]	2210 [709, 2500]	2980 [709, 4990]

table1(~age+lwt+as.factor(smoke)+as.factor(race)+bwt|low,data=bw)

## Warning in table1.formula(~age + lwt + as.factor(smoke) + as.factor(race) + :
## Terms to the right of '|' in formula 'x' define table columns and are expected
## to be factors with meaningful labels.

	0 (N=130)	1 (N=59)	Overall (N=189)
age
Mean (SD)	23.7 (5.58)	22.3 (4.51)	23.2 (5.30)
Median [Min, Max]	23.0 [14.0, 45.0]	22.0 [14.0, 34.0]	23.0 [14.0, 45.0]
lwt
Mean (SD)	133 (31.7)	122 (26.6)	130 (30.6)
Median [Min, Max]	124 [85.0, 250]	120 [80.0, 200]	121 [80.0, 250]
as.factor(smoke)
0	86 (66.2%)	29 (49.2%)	115 (60.8%)
1	44 (33.8%)	30 (50.8%)	74 (39.2%)
as.factor(race)
1	73 (56.2%)	23 (39.0%)	96 (50.8%)
2	15 (11.5%)	11 (18.6%)	26 (13.8%)
3	42 (32.3%)	25 (42.4%)	67 (35.4%)
bwt
Mean (SD)	3330 (478)	2100 (391)	2940 (729)
Median [Min, Max]	3270 [2520, 4990]	2210 [709, 2500]	2980 [709, 4990]

Xem lại: table1(~age+lwt+as.factor(smoke)+as.factor(race)+bwt|low,data=bw, render.continuous = c(.=“Mean (SD)”,.=“Median [Q1 - Q3]”,.=“Median [min,max]”))

Việc 3. So sánh mật độ xương tại cổ xương đùi giữa nam và nữ

df=read.csv("D:/Learning/CME/R Statistic 2025/DỮ LIỆU THỰC HÀNH (TS Thạch gửi)/Bone data.csv")

library(lessR)

## 
## lessR 4.4.3                         feedback: gerbing@pdx.edu 
## --------------------------------------------------------------
## > d <- Read("")  Read data file, many formats available, e.g., Excel
##   d is default data frame, data= in analysis routines optional
## 
## Many examples of reading, writing, and manipulating data, 
## graphics, testing means and proportions, regression, factor analysis,
## customization, forecasting, and aggregation from pivot tables
##   Enter: browseVignettes("lessR")
## 
## View lessR updates, now including time series forecasting
##   Enter: news(package="lessR")
## 
## Interactive data analysis
##   Enter: interact()

## 
## Attaching package: 'lessR'

## The following object is masked from 'package:table1':
## 
##     label

## The following object is masked from 'package:base':
## 
##     sort_by

Histogram(data=df,fnbmd)

## >>> Suggestions 
## bin_width: set the width of each bin 
## bin_start: set the start of the first bin 
## bin_end: set the end of the last bin 
## Histogram(fnbmd, density=TRUE)  # smoothed curve + histogram 
## Plot(fnbmd)  # Violin/Box/Scatterplot (VBS) plot 
## 
## --- fnbmd --- 
##  
##        n    miss      mean        sd       min       mdn       max 
##      2122      40     0.829     0.155     0.280     0.820     1.510 
##  
## 
##   
## --- Outliers ---     from the box plot: 33 
##  
## Small      Large 
## -----      ----- 
##  0.3      1.5 
##  0.3      1.5 
##  0.4      1.4 
##  0.4      1.4 
##  0.4      1.4 
##  0.4      1.4 
##  0.4      1.4 
##  0.4      1.4 
##  0.4      1.3 
##  0.4      1.3 
##  0.4      1.3 
##           1.3 
##           1.3 
##           1.3 
##           1.3 
##           1.2 
##           1.2 
##           1.2 
## 
## + 15 more outliers 
## 
## 
## Bin Width: 0.1 
## Number of Bins: 14 
##  
##        Bin  Midpnt  Count    Prop  Cumul.c  Cumul.p 
## --------------------------------------------------- 
##  0.2 > 0.3    0.25      1    0.00        1     0.00 
##  0.3 > 0.4    0.35      9    0.00       10     0.00 
##  0.4 > 0.5    0.45     15    0.01       25     0.01 
##  0.5 > 0.6    0.55    103    0.05      128     0.06 
##  0.6 > 0.7    0.65    306    0.14      434     0.20 
##  0.7 > 0.8    0.75    522    0.24      956     0.44 
##  0.8 > 0.9    0.85    534    0.25     1490     0.69 
##  0.9 > 1.0    0.95    371    0.17     1861     0.86 
##  1.0 > 1.1    1.05    183    0.08     2044     0.95 
##  1.1 > 1.2    1.15     48    0.02     2092     0.97 
##  1.2 > 1.3    1.25     21    0.01     2113     0.98 
##  1.3 > 1.4    1.35      6    0.00     2119     0.98 
##  1.4 > 1.5    1.45      2    0.00     2121     0.98 
##  1.5 > 1.6    1.55      1    0.00     2122     0.98 
##

ttest(fnbmd~sex,data=df)

## 
## Compare fnbmd across sex with levels Male and Female 
## Grouping Variable:  sex
## Response Variable:  fnbmd
## 
## 
## ------ Describe ------
## 
## fnbmd for sex Male:  n.miss = 23,  n = 822,  mean = 0.910,  sd = 0.153
## fnbmd for sex Female:  n.miss = 17,  n = 1300,  mean = 0.778,  sd = 0.132
## 
## Mean Difference of fnbmd:  0.132
## 
## Weighted Average Standard Deviation:   0.141 
## 
## 
## ------ Assumptions ------
## 
## Note: These hypothesis tests can perform poorly, and the 
##       t-test is typically robust to violations of assumptions. 
##       Use as heuristic guides instead of interpreting literally. 
## 
## Null hypothesis, for each group, is a normal distribution of fnbmd.
## Group Male: Sample mean assumed normal because n > 30, so no test needed.
## Group Female: Sample mean assumed normal because n > 30, so no test needed.
## 
## Null hypothesis is equal variances of fnbmd, homogeneous.
## Variance Ratio test:  F = 0.023/0.018 = 1.336,  df = 821;1299,  p-value = 0.000
## Levene's test, Brown-Forsythe:  t = 3.449,  df = 2120,  p-value = 0.001
## 
## 
## ------ Infer ------
## 
## --- Assume equal population variances of fnbmd for each sex 
## 
## t-cutoff for 95% range of variation: tcut =  1.961 
## Standard Error of Mean Difference: SE =  0.006 
## 
## Hypothesis Test of 0 Mean Diff:  t-value = 21.080,  df = 2120,  p-value = 0.000
## 
## Margin of Error for 95% Confidence Level:  0.012
## 95% Confidence Interval for Mean Difference:  0.120 to 0.144
## 
## 
## --- Do not assume equal population variances of fnbmd for each sex 
## 
## t-cutoff: tcut =  1.961 
## Standard Error of Mean Difference: SE =  0.006 
## 
## Hypothesis Test of 0 Mean Diff:  t = 20.407,  df = 1560.981, p-value = 0.000
## 
## Margin of Error for 95% Confidence Level:  0.013
## 95% Confidence Interval for Mean Difference:  0.119 to 0.145
## 
## 
## ------ Effect Size ------
## 
## --- Assume equal population variances of fnbmd for each sex 
## 
## Standardized Mean Difference of fnbmd, Cohen's d:  0.939
## 
## 
## ------ Practical Importance ------
## 
## Minimum Mean Difference of practical importance: mmd
## Minimum Standardized Mean Difference of practical importance: msmd
## Neither value specified, so no analysis
## 
## 
## ------ Graphics Smoothing Parameter ------
## 
## Density bandwidth for sex Male: 0.044
## Density bandwidth for sex Female: 0.034

##So sánh 2 nhóm nam nữ

men=subset(df,sex=="Male")
ttest(fnbmd~smoking,data=men)

## 
## Compare fnbmd across smoking with levels 0 and 1 
## Grouping Variable:  smoking
## Response Variable:  fnbmd
## 
## 
## ------ Describe ------
## 
## fnbmd for smoking 0:  n.miss = 8,  n = 312,  mean = 0.932,  sd = 0.151
## fnbmd for smoking 1:  n.miss = 15,  n = 510,  mean = 0.896,  sd = 0.153
## 
## Mean Difference of fnbmd:  0.036
## 
## Weighted Average Standard Deviation:   0.152 
## 
## 
## ------ Assumptions ------
## 
## Note: These hypothesis tests can perform poorly, and the 
##       t-test is typically robust to violations of assumptions. 
##       Use as heuristic guides instead of interpreting literally. 
## 
## Null hypothesis, for each group, is a normal distribution of fnbmd.
## Group 0: Sample mean assumed normal because n > 30, so no test needed.
## Group 1: Sample mean assumed normal because n > 30, so no test needed.
## 
## Null hypothesis is equal variances of fnbmd, homogeneous.
## Variance Ratio test:  F = 0.023/0.023 = 1.028,  df = 509;311,  p-value = 0.795
## Levene's test, Brown-Forsythe:  t = -1.122,  df = 820,  p-value = 0.262
## 
## 
## ------ Infer ------
## 
## --- Assume equal population variances of fnbmd for each smoking 
## 
## t-cutoff for 95% range of variation: tcut =  1.963 
## Standard Error of Mean Difference: SE =  0.011 
## 
## Hypothesis Test of 0 Mean Diff:  t-value = 3.316,  df = 820,  p-value = 0.001
## 
## Margin of Error for 95% Confidence Level:  0.021
## 95% Confidence Interval for Mean Difference:  0.015 to 0.058
## 
## 
## --- Do not assume equal population variances of fnbmd for each smoking 
## 
## t-cutoff: tcut =  1.964 
## Standard Error of Mean Difference: SE =  0.011 
## 
## Hypothesis Test of 0 Mean Diff:  t = 3.327,  df = 664.491, p-value = 0.001
## 
## Margin of Error for 95% Confidence Level:  0.021
## 95% Confidence Interval for Mean Difference:  0.015 to 0.058
## 
## 
## ------ Effect Size ------
## 
## --- Assume equal population variances of fnbmd for each smoking 
## 
## Standardized Mean Difference of fnbmd, Cohen's d:  0.238
## 
## 
## ------ Practical Importance ------
## 
## Minimum Mean Difference of practical importance: mmd
## Minimum Standardized Mean Difference of practical importance: msmd
## Neither value specified, so no analysis
## 
## 
## ------ Graphics Smoothing Parameter ------
## 
## Density bandwidth for smoking 0: 0.043
## Density bandwidth for smoking 1: 0.050

## Mô tả lại mật độ xương và smoking theo giới

table1(~fnbmd + as.factor(smoking)|sex,data=df)

	Female (N=1317)	Male (N=845)	Overall (N=2162)
fnbmd
Mean (SD)	0.778 (0.132)	0.910 (0.153)	0.829 (0.155)
Median [Min, Max]	0.770 [0.280, 1.31]	0.900 [0.340, 1.51]	0.820 [0.280, 1.51]
Missing	17 (1.3%)	23 (2.7%)	40 (1.9%)
as.factor(smoking)
0	923 (70.1%)	320 (37.9%)	1243 (57.5%)
1	394 (29.9%)	525 (62.1%)	919 (42.5%)

Việc 4. Đánh giá ảnh hưởng của café lên RER(mức độ dao động của mắt)

placebo=c(105, 119, 100, 97, 96, 101, 94, 95, 98)
coffee=c(96, 99, 94, 89, 96, 93, 88, 105, 88 )

chạy Bootstrap

library(boot); library(simpleboot)

## Simple Bootstrap Routines (1.1-8)

b = two.boot(placebo, coffee, mean, R = 500) #so sánh 2 biến bằng lệnh two.boot
boot.ci(b) #tính khoảng khác biệt giữa 2 biến,thường dùng kết quả của phân bố BCa: Bias correction analysis, nếu các phân bố khác như normal, basic hay percentile cùng hướng thì càng tốt

## BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
## Based on 500 bootstrap replicates
## 
## CALL : 
## boot.ci(boot.out = b)
## 
## Intervals : 
## Level      Normal              Basic         
## 95%   ( 0.523, 12.042 )   ( 0.444, 11.444 )  
## 
## Level     Percentile            BCa          
## 95%   ( 1.222, 12.222 )   ( 1.287, 12.271 )  
## Calculations and Intervals on Original Scale

hist(b, breaks=50) #breaks có "s", trục hoành là sự khác nhau giữa 2 nhóm

median(b$t) #thể hiện giá trị median, "t" là nơi chứa giá trị chênh lệch

## [1] 6.166667

quantile(b$t,probs=c(0.25,0.5,0.75),na.rm=TRUE) #thể hiện giá trị Q1,median,Q3

##      25%      50%      75% 
## 4.305556 6.166667 8.444444

mean(b$t) #thể hiện giá trị trung bình

## [1] 6.384222

Việc 5. So sánh tỉ lệ gãy xương (fx) giữa nam và nữ (sex)

table1(~ as.factor(fx) | sex, data = df)

	Female (N=1317)	Male (N=845)	Overall (N=2162)
as.factor(fx)
0	916 (69.6%)	701 (83.0%)	1617 (74.8%)
1	401 (30.4%)	144 (17.0%)	545 (25.2%)

chisq.test(df$fx, df$sex)

## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  df$fx and df$sex
## X-squared = 48.363, df = 1, p-value = 0.000000000003542

Ngay 3

Minh Pham

2025-05-12

Việc 2. Phân tích mô tả:

Xem lại: table1(~age+lwt+as.factor(smoke)+as.factor(race)+bwt|low,data=bw, render.continuous = c(.=“Mean (SD)”,.=“Median [Q1 - Q3]”,.=“Median [min,max]”))

Việc 3. So sánh mật độ xương tại cổ xương đùi giữa nam và nữ

Việc 4. Đánh giá ảnh hưởng của café lên RER(mức độ dao động của mắt)

chạy Bootstrap

Việc 5. So sánh tỉ lệ gãy xương (fx) giữa nam và nữ (sex)